1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
18 // (outs Result), (ins Imm, OpCmode)
19 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
21 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
23 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
25 // (outs Result), (ins Imm)
26 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
27 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
29 // (outs Result), (ins LHS, RHS, CondCode)
30 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
31 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
33 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
34 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
35 [SDTCisVec<0>, SDTCisVec<1>]>>;
37 // (outs Result), (ins LHS, RHS)
38 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
39 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
41 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
43 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
44 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
46 def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
48 def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
49 def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
50 def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
51 def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
52 def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
53 def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
55 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
56 def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
57 def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
58 def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
59 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
61 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
62 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
63 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
64 [SDTCisVec<0>, SDTCisSameAs<0, 1>,
65 SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
67 def SDT_assertext : SDTypeProfile<1, 1,
68 [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
69 def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
70 def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
72 //===----------------------------------------------------------------------===//
73 // Addressing-mode instantiations
74 //===----------------------------------------------------------------------===//
76 multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> {
77 defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
78 !foreach(decls.pattern, Offset,
79 !subst(OFFSET, dword_uimm12, decls.pattern)),
80 !foreach(decls.pattern, address,
81 !subst(OFFSET, dword_uimm12,
82 !subst(ALIGN, min_align8, decls.pattern))),
86 multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> {
87 defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
88 !foreach(decls.pattern, Offset,
89 !subst(OFFSET, qword_uimm12, decls.pattern)),
90 !foreach(decls.pattern, address,
91 !subst(OFFSET, qword_uimm12,
92 !subst(ALIGN, min_align16, decls.pattern))),
96 multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> {
97 defm : ls_64_pats<address, Base, Offset, v8i8>;
98 defm : ls_64_pats<address, Base, Offset, v4i16>;
99 defm : ls_64_pats<address, Base, Offset, v2i32>;
100 defm : ls_64_pats<address, Base, Offset, v1i64>;
101 defm : ls_64_pats<address, Base, Offset, v2f32>;
102 defm : ls_64_pats<address, Base, Offset, v1f64>;
104 defm : ls_128_pats<address, Base, Offset, v16i8>;
105 defm : ls_128_pats<address, Base, Offset, v8i16>;
106 defm : ls_128_pats<address, Base, Offset, v4i32>;
107 defm : ls_128_pats<address, Base, Offset, v2i64>;
108 defm : ls_128_pats<address, Base, Offset, v4f32>;
109 defm : ls_128_pats<address, Base, Offset, v2f64>;
112 defm : uimm12_neon_pats<(A64WrapperSmall
113 tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
114 (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
116 //===----------------------------------------------------------------------===//
118 //===----------------------------------------------------------------------===//
120 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
121 string asmop, SDPatternOperator opnode8B,
122 SDPatternOperator opnode16B,
123 bit Commutable = 0> {
124 let isCommutable = Commutable in {
125 def _8B : NeonI_3VSame<0b0, u, size, opcode,
126 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
127 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
128 [(set (v8i8 VPR64:$Rd),
129 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
132 def _16B : NeonI_3VSame<0b1, u, size, opcode,
133 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
134 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
135 [(set (v16i8 VPR128:$Rd),
136 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
142 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
143 string asmop, SDPatternOperator opnode,
144 bit Commutable = 0> {
145 let isCommutable = Commutable in {
146 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
147 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
148 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
149 [(set (v4i16 VPR64:$Rd),
150 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
153 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
154 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
155 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
156 [(set (v8i16 VPR128:$Rd),
157 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
160 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
161 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
162 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
163 [(set (v2i32 VPR64:$Rd),
164 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
167 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
168 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
169 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
170 [(set (v4i32 VPR128:$Rd),
171 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
175 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
176 string asmop, SDPatternOperator opnode,
178 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
179 let isCommutable = Commutable in {
180 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
181 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
182 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
183 [(set (v8i8 VPR64:$Rd),
184 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
187 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
188 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
189 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
190 [(set (v16i8 VPR128:$Rd),
191 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
196 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
197 string asmop, SDPatternOperator opnode,
199 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
200 let isCommutable = Commutable in {
201 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
202 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
203 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
204 [(set (v2i64 VPR128:$Rd),
205 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
210 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
211 // but Result types can be integer or floating point types.
212 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
213 string asmop, SDPatternOperator opnode2S,
214 SDPatternOperator opnode4S,
215 SDPatternOperator opnode2D,
216 ValueType ResTy2S, ValueType ResTy4S,
217 ValueType ResTy2D, bit Commutable = 0> {
218 let isCommutable = Commutable in {
219 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
220 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
221 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
222 [(set (ResTy2S VPR64:$Rd),
223 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
226 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
227 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
228 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
229 [(set (ResTy4S VPR128:$Rd),
230 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
233 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
234 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
235 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
236 [(set (ResTy2D VPR128:$Rd),
237 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
242 //===----------------------------------------------------------------------===//
243 // Instruction Definitions
244 //===----------------------------------------------------------------------===//
246 // Vector Arithmetic Instructions
248 // Vector Add (Integer and Floating-Point)
250 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
251 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
252 v2f32, v4f32, v2f64, 1>;
254 // Vector Sub (Integer and Floating-Point)
256 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
257 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
258 v2f32, v4f32, v2f64, 0>;
260 // Vector Multiply (Integer and Floating-Point)
262 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
263 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
264 v2f32, v4f32, v2f64, 1>;
266 // Vector Multiply (Polynomial)
268 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
269 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
271 // Vector Multiply-accumulate and Multiply-subtract (Integer)
273 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
274 // two operands constraints.
275 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
276 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
277 bits<5> opcode, SDPatternOperator opnode>
278 : NeonI_3VSame<q, u, size, opcode,
279 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
280 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
281 [(set (OpTy VPRC:$Rd),
282 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
284 let Constraints = "$src = $Rd";
287 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
288 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
290 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
291 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
294 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
295 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
296 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
297 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
298 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
299 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
300 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
301 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
302 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
303 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
304 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
305 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
307 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
308 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
309 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
310 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
311 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
312 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
313 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
314 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
315 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
316 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
317 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
318 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
320 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
322 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
323 (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
325 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
326 (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
328 let Predicates = [HasNEON, UseFusedMAC] in {
329 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
330 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
331 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
332 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
333 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
334 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
336 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
337 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
338 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
339 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
340 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
341 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
344 // We're also allowed to match the fma instruction regardless of compile
346 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
347 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
348 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
349 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
350 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
351 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
353 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
354 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
355 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
356 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
357 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
358 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
360 // Vector Divide (Floating-Point)
362 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
363 v2f32, v4f32, v2f64, 0>;
365 // Vector Bitwise Operations
367 // Vector Bitwise AND
369 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
371 // Vector Bitwise Exclusive OR
373 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
377 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
379 // ORR disassembled as MOV if Vn==Vm
381 // Vector Move - register
382 // Alias for ORR if Vn=Vm.
383 // FIXME: This is actually the preferred syntax but TableGen can't deal with
384 // custom printing of aliases.
385 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
386 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
387 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
388 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
390 // The MOVI instruction takes two immediate operands. The first is the
391 // immediate encoding, while the second is the cmode. A cmode of 14, or
392 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
393 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
394 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
396 def Neon_not8B : PatFrag<(ops node:$in),
397 (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
398 def Neon_not16B : PatFrag<(ops node:$in),
399 (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
401 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
402 (or node:$Rn, (Neon_not8B node:$Rm))>;
404 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
405 (or node:$Rn, (Neon_not16B node:$Rm))>;
407 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
408 (and node:$Rn, (Neon_not8B node:$Rm))>;
410 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
411 (and node:$Rn, (Neon_not16B node:$Rm))>;
414 // Vector Bitwise OR NOT - register
416 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
417 Neon_orn8B, Neon_orn16B, 0>;
419 // Vector Bitwise Bit Clear (AND NOT) - register
421 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
422 Neon_bic8B, Neon_bic16B, 0>;
424 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
425 SDPatternOperator opnode16B,
427 Instruction INST16B> {
428 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
429 (INST8B VPR64:$Rn, VPR64:$Rm)>;
430 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
431 (INST8B VPR64:$Rn, VPR64:$Rm)>;
432 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
433 (INST8B VPR64:$Rn, VPR64:$Rm)>;
434 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
435 (INST16B VPR128:$Rn, VPR128:$Rm)>;
436 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
437 (INST16B VPR128:$Rn, VPR128:$Rm)>;
438 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
439 (INST16B VPR128:$Rn, VPR128:$Rm)>;
442 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
443 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
444 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
445 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
446 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
447 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
449 // Vector Bitwise Select
450 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
451 0b0, 0b1, 0b01, 0b00011, vselect>;
453 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
454 0b1, 0b1, 0b01, 0b00011, vselect>;
456 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
458 Instruction INST16B> {
459 // Disassociate type from instruction definition
460 def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
461 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
462 def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
463 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
464 def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
465 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
466 def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
467 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
468 def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
469 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
470 def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
471 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472 def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
473 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
474 def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
475 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
477 // Allow to match BSL instruction pattern with non-constant operand
478 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
479 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
480 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
481 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
482 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
483 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
484 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
485 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
486 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
487 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
488 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
489 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
490 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
491 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
492 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
493 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
494 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
495 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
496 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
497 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
498 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
499 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
500 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
501 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
503 // Allow to match llvm.arm.* intrinsics.
504 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
505 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
506 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
507 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
508 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
509 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
510 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
511 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
512 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
513 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
514 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
515 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
516 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
517 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
518 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
519 def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
520 (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
521 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
522 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
523 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
524 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
525 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
526 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
527 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
528 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
529 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
530 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
531 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
532 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
533 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
534 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
535 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
536 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
537 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
538 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
539 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
542 // Additional patterns for bitwise instruction BSL
543 defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
545 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
546 (vselect node:$src, node:$Rn, node:$Rm),
547 [{ (void)N; return false; }]>;
549 // Vector Bitwise Insert if True
551 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
552 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
553 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
554 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
556 // Vector Bitwise Insert if False
558 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
559 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
560 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
561 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
563 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
565 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
566 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
567 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
568 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
570 // Vector Absolute Difference and Accumulate (Unsigned)
571 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
572 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
573 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
574 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
575 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
576 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
577 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
578 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
579 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
580 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
581 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
582 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
584 // Vector Absolute Difference and Accumulate (Signed)
585 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
586 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
587 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
588 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
589 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
590 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
591 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
592 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
593 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
594 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
595 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
596 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
599 // Vector Absolute Difference (Signed, Unsigned)
600 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
601 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
603 // Vector Absolute Difference (Floating Point)
604 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
605 int_arm_neon_vabds, int_arm_neon_vabds,
606 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
608 // Vector Reciprocal Step (Floating Point)
609 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
610 int_arm_neon_vrecps, int_arm_neon_vrecps,
612 v2f32, v4f32, v2f64, 0>;
614 // Vector Reciprocal Square Root Step (Floating Point)
615 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
616 int_arm_neon_vrsqrts,
617 int_arm_neon_vrsqrts,
618 int_arm_neon_vrsqrts,
619 v2f32, v4f32, v2f64, 0>;
621 // Vector Comparisons
623 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
624 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
625 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
626 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
627 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
628 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
629 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
630 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
631 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
632 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
634 // NeonI_compare_aliases class: swaps register operands to implement
635 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
636 class NeonI_compare_aliases<string asmop, string asmlane,
637 Instruction inst, RegisterOperand VPRC>
638 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
640 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
642 // Vector Comparisons (Integer)
644 // Vector Compare Mask Equal (Integer)
645 let isCommutable =1 in {
646 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
649 // Vector Compare Mask Higher or Same (Unsigned Integer)
650 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
652 // Vector Compare Mask Greater Than or Equal (Integer)
653 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
655 // Vector Compare Mask Higher (Unsigned Integer)
656 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
658 // Vector Compare Mask Greater Than (Integer)
659 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
661 // Vector Compare Mask Bitwise Test (Integer)
662 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
664 // Vector Compare Mask Less or Same (Unsigned Integer)
665 // CMLS is alias for CMHS with operands reversed.
666 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
667 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
668 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
669 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
670 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
671 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
672 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
674 // Vector Compare Mask Less Than or Equal (Integer)
675 // CMLE is alias for CMGE with operands reversed.
676 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
677 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
678 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
679 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
680 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
681 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
682 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
684 // Vector Compare Mask Lower (Unsigned Integer)
685 // CMLO is alias for CMHI with operands reversed.
686 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
687 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
688 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
689 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
690 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
691 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
692 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
694 // Vector Compare Mask Less Than (Integer)
695 // CMLT is alias for CMGT with operands reversed.
696 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
697 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
698 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
699 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
700 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
701 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
702 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
705 def neon_uimm0_asmoperand : AsmOperandClass
708 let PredicateMethod = "isUImm<0>";
709 let RenderMethod = "addImmOperands";
712 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
713 let ParserMatchClass = neon_uimm0_asmoperand;
714 let PrintMethod = "printNeonUImm0Operand";
718 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
720 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
721 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
722 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
723 [(set (v8i8 VPR64:$Rd),
724 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
727 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
728 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
729 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
730 [(set (v16i8 VPR128:$Rd),
731 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
734 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
735 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
736 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
737 [(set (v4i16 VPR64:$Rd),
738 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
741 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
742 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
743 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
744 [(set (v8i16 VPR128:$Rd),
745 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
748 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
749 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
750 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
751 [(set (v2i32 VPR64:$Rd),
752 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
755 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
756 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
757 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
758 [(set (v4i32 VPR128:$Rd),
759 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
762 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
763 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
764 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
765 [(set (v2i64 VPR128:$Rd),
766 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
770 // Vector Compare Mask Equal to Zero (Integer)
771 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
773 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
774 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
776 // Vector Compare Mask Greater Than Zero (Signed Integer)
777 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
779 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
780 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
782 // Vector Compare Mask Less Than Zero (Signed Integer)
783 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
785 // Vector Comparisons (Floating Point)
787 // Vector Compare Mask Equal (Floating Point)
788 let isCommutable =1 in {
789 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
790 Neon_cmeq, Neon_cmeq,
791 v2i32, v4i32, v2i64, 0>;
794 // Vector Compare Mask Greater Than Or Equal (Floating Point)
795 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
796 Neon_cmge, Neon_cmge,
797 v2i32, v4i32, v2i64, 0>;
799 // Vector Compare Mask Greater Than (Floating Point)
800 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
801 Neon_cmgt, Neon_cmgt,
802 v2i32, v4i32, v2i64, 0>;
804 // Vector Compare Mask Less Than Or Equal (Floating Point)
805 // FCMLE is alias for FCMGE with operands reversed.
806 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
807 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
808 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
810 // Vector Compare Mask Less Than (Floating Point)
811 // FCMLT is alias for FCMGT with operands reversed.
812 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
813 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
814 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
817 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
818 string asmop, CondCode CC>
820 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
821 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
822 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
823 [(set (v2i32 VPR64:$Rd),
824 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpz32:$FPImm), CC)))],
827 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
828 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
829 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
830 [(set (v4i32 VPR128:$Rd),
831 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))],
834 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
835 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
836 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
837 [(set (v2i64 VPR128:$Rd),
838 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))],
842 // Vector Compare Mask Equal to Zero (Floating Point)
843 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
845 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
846 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
848 // Vector Compare Mask Greater Than Zero (Floating Point)
849 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
851 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
852 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
854 // Vector Compare Mask Less Than Zero (Floating Point)
855 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
857 // Vector Absolute Comparisons (Floating Point)
859 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
860 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
861 int_arm_neon_vacged, int_arm_neon_vacgeq,
862 int_aarch64_neon_vacgeq,
863 v2i32, v4i32, v2i64, 0>;
865 // Vector Absolute Compare Mask Greater Than (Floating Point)
866 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
867 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
868 int_aarch64_neon_vacgtq,
869 v2i32, v4i32, v2i64, 0>;
871 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
872 // FACLE is alias for FACGE with operands reversed.
873 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
874 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
875 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
877 // Vector Absolute Compare Mask Less Than (Floating Point)
878 // FACLT is alias for FACGT with operands reversed.
879 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
880 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
881 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
883 // Vector halving add (Integer Signed, Unsigned)
884 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
885 int_arm_neon_vhadds, 1>;
886 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
887 int_arm_neon_vhaddu, 1>;
889 // Vector halving sub (Integer Signed, Unsigned)
890 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
891 int_arm_neon_vhsubs, 0>;
892 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
893 int_arm_neon_vhsubu, 0>;
895 // Vector rouding halving add (Integer Signed, Unsigned)
896 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
897 int_arm_neon_vrhadds, 1>;
898 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
899 int_arm_neon_vrhaddu, 1>;
901 // Vector Saturating add (Integer Signed, Unsigned)
902 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
903 int_arm_neon_vqadds, 1>;
904 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
905 int_arm_neon_vqaddu, 1>;
907 // Vector Saturating sub (Integer Signed, Unsigned)
908 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
909 int_arm_neon_vqsubs, 1>;
910 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
911 int_arm_neon_vqsubu, 1>;
913 // Vector Shift Left (Signed and Unsigned Integer)
914 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
915 int_arm_neon_vshifts, 1>;
916 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
917 int_arm_neon_vshiftu, 1>;
919 // Vector Saturating Shift Left (Signed and Unsigned Integer)
920 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
921 int_arm_neon_vqshifts, 1>;
922 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
923 int_arm_neon_vqshiftu, 1>;
925 // Vector Rouding Shift Left (Signed and Unsigned Integer)
926 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
927 int_arm_neon_vrshifts, 1>;
928 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
929 int_arm_neon_vrshiftu, 1>;
931 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
932 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
933 int_arm_neon_vqrshifts, 1>;
934 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
935 int_arm_neon_vqrshiftu, 1>;
937 // Vector Maximum (Signed and Unsigned Integer)
938 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
939 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
941 // Vector Minimum (Signed and Unsigned Integer)
942 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
943 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
945 // Vector Maximum (Floating Point)
946 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
947 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
948 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
950 // Vector Minimum (Floating Point)
951 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
952 int_arm_neon_vmins, int_arm_neon_vmins,
953 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
955 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
956 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
957 int_aarch64_neon_vmaxnm,
958 int_aarch64_neon_vmaxnm,
959 int_aarch64_neon_vmaxnm,
960 v2f32, v4f32, v2f64, 1>;
962 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
963 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
964 int_aarch64_neon_vminnm,
965 int_aarch64_neon_vminnm,
966 int_aarch64_neon_vminnm,
967 v2f32, v4f32, v2f64, 1>;
969 // Vector Maximum Pairwise (Signed and Unsigned Integer)
970 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
971 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
973 // Vector Minimum Pairwise (Signed and Unsigned Integer)
974 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
975 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
977 // Vector Maximum Pairwise (Floating Point)
978 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
979 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
980 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
982 // Vector Minimum Pairwise (Floating Point)
983 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
984 int_arm_neon_vpmins, int_arm_neon_vpmins,
985 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
987 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
988 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
989 int_aarch64_neon_vpmaxnm,
990 int_aarch64_neon_vpmaxnm,
991 int_aarch64_neon_vpmaxnm,
992 v2f32, v4f32, v2f64, 1>;
994 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
995 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
996 int_aarch64_neon_vpminnm,
997 int_aarch64_neon_vpminnm,
998 int_aarch64_neon_vpminnm,
999 v2f32, v4f32, v2f64, 1>;
1001 // Vector Addition Pairwise (Integer)
1002 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
1004 // Vector Addition Pairwise (Floating Point)
1005 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
1009 v2f32, v4f32, v2f64, 1>;
1011 // Vector Saturating Doubling Multiply High
1012 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
1013 int_arm_neon_vqdmulh, 1>;
1015 // Vector Saturating Rouding Doubling Multiply High
1016 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
1017 int_arm_neon_vqrdmulh, 1>;
1019 // Vector Multiply Extended (Floating Point)
1020 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
1021 int_aarch64_neon_vmulx,
1022 int_aarch64_neon_vmulx,
1023 int_aarch64_neon_vmulx,
1024 v2f32, v4f32, v2f64, 1>;
1026 // Patterns to match llvm.aarch64.* intrinsic for
1027 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
1028 class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
1029 : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
1031 (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
1034 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
1035 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
1036 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
1037 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
1038 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
1040 // Vector Immediate Instructions
1042 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
1044 def _asmoperand : AsmOperandClass
1046 let Name = "NeonMovImmShift" # PREFIX;
1047 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
1048 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
1052 // Definition of vector immediates shift operands
1054 // The selectable use-cases extract the shift operation
1055 // information from the OpCmode fields encoded in the immediate.
1056 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
1057 uint64_t OpCmode = N->getZExtValue();
1059 unsigned ShiftOnesIn;
1061 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1062 if (!HasShift) return SDValue();
1063 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1066 // Vector immediates shift operands which accept LSL and MSL
1067 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1068 // or 0, 8 (LSLH) or 8, 16 (MSL).
1069 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1070 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1071 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
1072 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1074 multiclass neon_mov_imm_shift_operands<string PREFIX,
1075 string HALF, string ISHALF, code pred>
1077 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1080 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1082 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1083 let ParserMatchClass =
1084 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1088 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1090 unsigned ShiftOnesIn;
1092 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1093 return (HasShift && !ShiftOnesIn);
1096 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1098 unsigned ShiftOnesIn;
1100 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1101 return (HasShift && ShiftOnesIn);
1104 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1106 unsigned ShiftOnesIn;
1108 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1109 return (HasShift && !ShiftOnesIn);
1112 def neon_uimm1_asmoperand : AsmOperandClass
1115 let PredicateMethod = "isUImm<1>";
1116 let RenderMethod = "addImmOperands";
1119 def neon_uimm2_asmoperand : AsmOperandClass
1122 let PredicateMethod = "isUImm<2>";
1123 let RenderMethod = "addImmOperands";
1126 def neon_uimm8_asmoperand : AsmOperandClass
1129 let PredicateMethod = "isUImm<8>";
1130 let RenderMethod = "addImmOperands";
1133 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1134 let ParserMatchClass = neon_uimm8_asmoperand;
1135 let PrintMethod = "printUImmHexOperand";
1138 def neon_uimm64_mask_asmoperand : AsmOperandClass
1140 let Name = "NeonUImm64Mask";
1141 let PredicateMethod = "isNeonUImm64Mask";
1142 let RenderMethod = "addNeonUImm64MaskOperands";
1145 // MCOperand for 64-bit bytemask with each byte having only the
1146 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1147 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1148 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1149 let PrintMethod = "printNeonUImm64MaskOperand";
1152 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1153 SDPatternOperator opnode>
1155 // shift zeros, per word
1156 def _2S : NeonI_1VModImm<0b0, op,
1158 (ins neon_uimm8:$Imm,
1159 neon_mov_imm_LSL_operand:$Simm),
1160 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1161 [(set (v2i32 VPR64:$Rd),
1162 (v2i32 (opnode (timm:$Imm),
1163 (neon_mov_imm_LSL_operand:$Simm))))],
1166 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1169 def _4S : NeonI_1VModImm<0b1, op,
1171 (ins neon_uimm8:$Imm,
1172 neon_mov_imm_LSL_operand:$Simm),
1173 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1174 [(set (v4i32 VPR128:$Rd),
1175 (v4i32 (opnode (timm:$Imm),
1176 (neon_mov_imm_LSL_operand:$Simm))))],
1179 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1182 // shift zeros, per halfword
1183 def _4H : NeonI_1VModImm<0b0, op,
1185 (ins neon_uimm8:$Imm,
1186 neon_mov_imm_LSLH_operand:$Simm),
1187 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1188 [(set (v4i16 VPR64:$Rd),
1189 (v4i16 (opnode (timm:$Imm),
1190 (neon_mov_imm_LSLH_operand:$Simm))))],
1193 let cmode = {0b1, 0b0, Simm, 0b0};
1196 def _8H : NeonI_1VModImm<0b1, op,
1198 (ins neon_uimm8:$Imm,
1199 neon_mov_imm_LSLH_operand:$Simm),
1200 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1201 [(set (v8i16 VPR128:$Rd),
1202 (v8i16 (opnode (timm:$Imm),
1203 (neon_mov_imm_LSLH_operand:$Simm))))],
1206 let cmode = {0b1, 0b0, Simm, 0b0};
1210 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1211 SDPatternOperator opnode,
1212 SDPatternOperator neonopnode>
1214 let Constraints = "$src = $Rd" in {
1215 // shift zeros, per word
1216 def _2S : NeonI_1VModImm<0b0, op,
1218 (ins VPR64:$src, neon_uimm8:$Imm,
1219 neon_mov_imm_LSL_operand:$Simm),
1220 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1221 [(set (v2i32 VPR64:$Rd),
1222 (v2i32 (opnode (v2i32 VPR64:$src),
1223 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1224 neon_mov_imm_LSL_operand:$Simm)))))))],
1227 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1230 def _4S : NeonI_1VModImm<0b1, op,
1232 (ins VPR128:$src, neon_uimm8:$Imm,
1233 neon_mov_imm_LSL_operand:$Simm),
1234 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1235 [(set (v4i32 VPR128:$Rd),
1236 (v4i32 (opnode (v4i32 VPR128:$src),
1237 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1238 neon_mov_imm_LSL_operand:$Simm)))))))],
1241 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1244 // shift zeros, per halfword
1245 def _4H : NeonI_1VModImm<0b0, op,
1247 (ins VPR64:$src, neon_uimm8:$Imm,
1248 neon_mov_imm_LSLH_operand:$Simm),
1249 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1250 [(set (v4i16 VPR64:$Rd),
1251 (v4i16 (opnode (v4i16 VPR64:$src),
1252 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1253 neon_mov_imm_LSL_operand:$Simm)))))))],
1256 let cmode = {0b1, 0b0, Simm, 0b1};
1259 def _8H : NeonI_1VModImm<0b1, op,
1261 (ins VPR128:$src, neon_uimm8:$Imm,
1262 neon_mov_imm_LSLH_operand:$Simm),
1263 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1264 [(set (v8i16 VPR128:$Rd),
1265 (v8i16 (opnode (v8i16 VPR128:$src),
1266 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1267 neon_mov_imm_LSL_operand:$Simm)))))))],
1270 let cmode = {0b1, 0b0, Simm, 0b1};
1275 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1276 SDPatternOperator opnode>
1278 // shift ones, per word
1279 def _2S : NeonI_1VModImm<0b0, op,
1281 (ins neon_uimm8:$Imm,
1282 neon_mov_imm_MSL_operand:$Simm),
1283 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1284 [(set (v2i32 VPR64:$Rd),
1285 (v2i32 (opnode (timm:$Imm),
1286 (neon_mov_imm_MSL_operand:$Simm))))],
1289 let cmode = {0b1, 0b1, 0b0, Simm};
1292 def _4S : NeonI_1VModImm<0b1, op,
1294 (ins neon_uimm8:$Imm,
1295 neon_mov_imm_MSL_operand:$Simm),
1296 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1297 [(set (v4i32 VPR128:$Rd),
1298 (v4i32 (opnode (timm:$Imm),
1299 (neon_mov_imm_MSL_operand:$Simm))))],
1302 let cmode = {0b1, 0b1, 0b0, Simm};
1306 // Vector Move Immediate Shifted
1307 let isReMaterializable = 1 in {
1308 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1311 // Vector Move Inverted Immediate Shifted
1312 let isReMaterializable = 1 in {
1313 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1316 // Vector Bitwise Bit Clear (AND NOT) - immediate
1317 let isReMaterializable = 1 in {
1318 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1322 // Vector Bitwise OR - immedidate
1324 let isReMaterializable = 1 in {
1325 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1329 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1330 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1331 // BIC immediate instructions selection requires additional patterns to
1332 // transform Neon_movi operands into BIC immediate operands
1334 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1335 uint64_t OpCmode = N->getZExtValue();
1337 unsigned ShiftOnesIn;
1338 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1339 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1340 // Transform encoded shift amount 0 to 1 and 1 to 0.
1341 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1344 def neon_mov_imm_LSLH_transform_operand
1347 unsigned ShiftOnesIn;
1349 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1350 return (HasShift && !ShiftOnesIn); }],
1351 neon_mov_imm_LSLH_transform_XFORM>;
1353 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1354 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1355 def : Pat<(v4i16 (and VPR64:$src,
1356 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1357 (BICvi_lsl_4H VPR64:$src, 0,
1358 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1360 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1361 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1362 def : Pat<(v8i16 (and VPR128:$src,
1363 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1364 (BICvi_lsl_8H VPR128:$src, 0,
1365 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1368 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1369 SDPatternOperator neonopnode,
1371 Instruction INST8H> {
1372 def : Pat<(v8i8 (opnode VPR64:$src,
1373 (bitconvert(v4i16 (neonopnode timm:$Imm,
1374 neon_mov_imm_LSLH_operand:$Simm))))),
1375 (INST4H VPR64:$src, neon_uimm8:$Imm,
1376 neon_mov_imm_LSLH_operand:$Simm)>;
1377 def : Pat<(v1i64 (opnode VPR64:$src,
1378 (bitconvert(v4i16 (neonopnode timm:$Imm,
1379 neon_mov_imm_LSLH_operand:$Simm))))),
1380 (INST4H VPR64:$src, neon_uimm8:$Imm,
1381 neon_mov_imm_LSLH_operand:$Simm)>;
1383 def : Pat<(v16i8 (opnode VPR128:$src,
1384 (bitconvert(v8i16 (neonopnode timm:$Imm,
1385 neon_mov_imm_LSLH_operand:$Simm))))),
1386 (INST8H VPR128:$src, neon_uimm8:$Imm,
1387 neon_mov_imm_LSLH_operand:$Simm)>;
1388 def : Pat<(v4i32 (opnode VPR128:$src,
1389 (bitconvert(v8i16 (neonopnode timm:$Imm,
1390 neon_mov_imm_LSLH_operand:$Simm))))),
1391 (INST8H VPR128:$src, neon_uimm8:$Imm,
1392 neon_mov_imm_LSLH_operand:$Simm)>;
1393 def : Pat<(v2i64 (opnode VPR128:$src,
1394 (bitconvert(v8i16 (neonopnode timm:$Imm,
1395 neon_mov_imm_LSLH_operand:$Simm))))),
1396 (INST8H VPR128:$src, neon_uimm8:$Imm,
1397 neon_mov_imm_LSLH_operand:$Simm)>;
1400 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1401 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1403 // Additional patterns for Vector Bitwise OR - immedidate
1404 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1407 // Vector Move Immediate Masked
1408 let isReMaterializable = 1 in {
1409 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1412 // Vector Move Inverted Immediate Masked
1413 let isReMaterializable = 1 in {
1414 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1417 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1418 Instruction inst, RegisterOperand VPRC>
1419 : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1420 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1422 // Aliases for Vector Move Immediate Shifted
1423 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1424 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1425 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1426 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1428 // Aliases for Vector Move Inverted Immediate Shifted
1429 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1430 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1431 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1432 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1434 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1435 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1436 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1437 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1438 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1440 // Aliases for Vector Bitwise OR - immedidate
1441 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1442 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1443 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1444 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1446 // Vector Move Immediate - per byte
1447 let isReMaterializable = 1 in {
1448 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1449 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1450 "movi\t$Rd.8b, $Imm",
1451 [(set (v8i8 VPR64:$Rd),
1452 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1457 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1458 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1459 "movi\t$Rd.16b, $Imm",
1460 [(set (v16i8 VPR128:$Rd),
1461 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1467 // Vector Move Immediate - bytemask, per double word
1468 let isReMaterializable = 1 in {
1469 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1470 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1471 "movi\t $Rd.2d, $Imm",
1472 [(set (v2i64 VPR128:$Rd),
1473 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1479 // Vector Move Immediate - bytemask, one doubleword
1481 let isReMaterializable = 1 in {
1482 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1483 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1485 [(set (v1i64 FPR64:$Rd),
1486 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1492 // Vector Floating Point Move Immediate
1494 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1495 Operand immOpType, bit q, bit op>
1496 : NeonI_1VModImm<q, op,
1497 (outs VPRC:$Rd), (ins immOpType:$Imm),
1498 "fmov\t$Rd" # asmlane # ", $Imm",
1499 [(set (OpTy VPRC:$Rd),
1500 (OpTy (Neon_fmovi (timm:$Imm))))],
1505 let isReMaterializable = 1 in {
1506 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1507 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1508 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1511 // Vector Shift (Immediate)
1512 // Immediate in [0, 63]
1513 def imm0_63 : Operand<i32> {
1514 let ParserMatchClass = uimm6_asmoperand;
1517 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1521 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1522 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1523 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1524 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1526 // The shift right immediate amount, in the range 1 to element bits, is computed
1527 // as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
1528 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1530 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1531 let Name = "ShrImm" # OFFSET;
1532 let RenderMethod = "addImmOperands";
1533 let DiagnosticType = "ShrImm" # OFFSET;
1536 class shr_imm<string OFFSET> : Operand<i32> {
1537 let EncoderMethod = "getShiftRightImm" # OFFSET;
1538 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1539 let ParserMatchClass =
1540 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1543 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1544 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1545 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1546 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1548 def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
1549 def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
1550 def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
1551 def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
1553 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1554 let Name = "ShlImm" # OFFSET;
1555 let RenderMethod = "addImmOperands";
1556 let DiagnosticType = "ShlImm" # OFFSET;
1559 class shl_imm<string OFFSET> : Operand<i32> {
1560 let EncoderMethod = "getShiftLeftImm" # OFFSET;
1561 let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1562 let ParserMatchClass =
1563 !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1566 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1567 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1568 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1569 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1571 def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
1572 def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
1573 def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
1574 def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
1576 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1577 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1578 : NeonI_2VShiftImm<q, u, opcode,
1579 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1580 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1581 [(set (Ty VPRC:$Rd),
1582 (Ty (OpNode (Ty VPRC:$Rn),
1583 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
1586 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1587 // 64-bit vector types.
1588 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
1589 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1592 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
1593 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1596 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
1597 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1600 // 128-bit vector types.
1601 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
1602 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1605 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
1606 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1609 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
1610 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1613 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
1614 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1618 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1619 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1621 let Inst{22-19} = 0b0001;
1624 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1626 let Inst{22-20} = 0b001;
1629 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1631 let Inst{22-21} = 0b01;
1634 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1636 let Inst{22-19} = 0b0001;
1639 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1641 let Inst{22-20} = 0b001;
1644 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1646 let Inst{22-21} = 0b01;
1649 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1656 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1659 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1660 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1662 def Neon_High16B : PatFrag<(ops node:$in),
1663 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1664 def Neon_High8H : PatFrag<(ops node:$in),
1665 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1666 def Neon_High4S : PatFrag<(ops node:$in),
1667 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1668 def Neon_High2D : PatFrag<(ops node:$in),
1669 (extract_subvector (v2i64 node:$in), (iPTR 1))>;
1670 def Neon_High4float : PatFrag<(ops node:$in),
1671 (extract_subvector (v4f32 node:$in), (iPTR 2))>;
1672 def Neon_High2double : PatFrag<(ops node:$in),
1673 (extract_subvector (v2f64 node:$in), (iPTR 1))>;
1675 def Neon_Low16B : PatFrag<(ops node:$in),
1676 (v8i8 (extract_subvector (v16i8 node:$in),
1678 def Neon_Low8H : PatFrag<(ops node:$in),
1679 (v4i16 (extract_subvector (v8i16 node:$in),
1681 def Neon_Low4S : PatFrag<(ops node:$in),
1682 (v2i32 (extract_subvector (v4i32 node:$in),
1684 def Neon_Low2D : PatFrag<(ops node:$in),
1685 (v1i64 (extract_subvector (v2i64 node:$in),
1687 def Neon_Low4float : PatFrag<(ops node:$in),
1688 (v2f32 (extract_subvector (v4f32 node:$in),
1690 def Neon_Low2double : PatFrag<(ops node:$in),
1691 (v1f64 (extract_subvector (v2f64 node:$in),
1694 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1695 string SrcT, ValueType DestTy, ValueType SrcTy,
1696 Operand ImmTy, SDPatternOperator ExtOp>
1697 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1698 (ins VPR64:$Rn, ImmTy:$Imm),
1699 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1700 [(set (DestTy VPR128:$Rd),
1702 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1703 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1706 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1707 string SrcT, ValueType DestTy, ValueType SrcTy,
1708 int StartIndex, Operand ImmTy,
1709 SDPatternOperator ExtOp, PatFrag getTop>
1710 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1711 (ins VPR128:$Rn, ImmTy:$Imm),
1712 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1713 [(set (DestTy VPR128:$Rd),
1716 (SrcTy (getTop VPR128:$Rn)))),
1717 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1720 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1722 // 64-bit vector types.
1723 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1725 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1728 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1730 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1733 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1735 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1738 // 128-bit vector types
1739 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1740 8, shl_imm8, ExtOp, Neon_High16B> {
1741 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1744 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1745 4, shl_imm16, ExtOp, Neon_High8H> {
1746 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1749 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1750 2, shl_imm32, ExtOp, Neon_High4S> {
1751 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1754 // Use other patterns to match when the immediate is 0.
1755 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1756 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1758 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1759 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1761 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1762 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1764 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1765 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1767 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1768 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1770 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1771 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1775 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1776 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1778 // Rounding/Saturating shift
1779 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1780 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1781 SDPatternOperator OpNode>
1782 : NeonI_2VShiftImm<q, u, opcode,
1783 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1784 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1785 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1786 (i32 ImmTy:$Imm))))],
1789 // shift right (vector by immediate)
1790 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1791 SDPatternOperator OpNode> {
1792 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1794 let Inst{22-19} = 0b0001;
1797 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1799 let Inst{22-20} = 0b001;
1802 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1804 let Inst{22-21} = 0b01;
1807 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1809 let Inst{22-19} = 0b0001;
1812 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1814 let Inst{22-20} = 0b001;
1817 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1819 let Inst{22-21} = 0b01;
1822 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1828 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1829 SDPatternOperator OpNode> {
1830 // 64-bit vector types.
1831 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
1833 let Inst{22-19} = 0b0001;
1836 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
1838 let Inst{22-20} = 0b001;
1841 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
1843 let Inst{22-21} = 0b01;
1846 // 128-bit vector types.
1847 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
1849 let Inst{22-19} = 0b0001;
1852 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
1854 let Inst{22-20} = 0b001;
1857 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
1859 let Inst{22-21} = 0b01;
1862 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
1868 // Rounding shift right
1869 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1870 int_aarch64_neon_vsrshr>;
1871 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1872 int_aarch64_neon_vurshr>;
1874 // Saturating shift left unsigned
1875 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1877 // Saturating shift left
1878 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1879 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1881 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1882 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1884 : NeonI_2VShiftImm<q, u, opcode,
1885 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1886 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1887 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1888 (Ty (OpNode (Ty VPRC:$Rn),
1889 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
1891 let Constraints = "$src = $Rd";
1894 // Shift Right accumulate
1895 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1896 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1898 let Inst{22-19} = 0b0001;
1901 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1903 let Inst{22-20} = 0b001;
1906 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1908 let Inst{22-21} = 0b01;
1911 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1913 let Inst{22-19} = 0b0001;
1916 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1918 let Inst{22-20} = 0b001;
1921 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1923 let Inst{22-21} = 0b01;
1926 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1932 // Shift right and accumulate
1933 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1934 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1936 // Rounding shift accumulate
1937 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1938 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1939 SDPatternOperator OpNode>
1940 : NeonI_2VShiftImm<q, u, opcode,
1941 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1942 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1943 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1944 (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
1946 let Constraints = "$src = $Rd";
1949 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1950 SDPatternOperator OpNode> {
1951 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1953 let Inst{22-19} = 0b0001;
1956 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1958 let Inst{22-20} = 0b001;
1961 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1963 let Inst{22-21} = 0b01;
1966 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1968 let Inst{22-19} = 0b0001;
1971 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1973 let Inst{22-20} = 0b001;
1976 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1978 let Inst{22-21} = 0b01;
1981 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1987 // Rounding shift right and accumulate
1988 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1989 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1991 // Shift insert by immediate
1992 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1993 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1994 SDPatternOperator OpNode>
1995 : NeonI_2VShiftImm<q, u, opcode,
1996 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1997 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1998 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1999 (i32 ImmTy:$Imm))))],
2001 let Constraints = "$src = $Rd";
2004 // shift left insert (vector by immediate)
2005 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
2006 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2007 int_aarch64_neon_vsli> {
2008 let Inst{22-19} = 0b0001;
2011 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2012 int_aarch64_neon_vsli> {
2013 let Inst{22-20} = 0b001;
2016 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2017 int_aarch64_neon_vsli> {
2018 let Inst{22-21} = 0b01;
2021 // 128-bit vector types
2022 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2023 int_aarch64_neon_vsli> {
2024 let Inst{22-19} = 0b0001;
2027 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2028 int_aarch64_neon_vsli> {
2029 let Inst{22-20} = 0b001;
2032 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2033 int_aarch64_neon_vsli> {
2034 let Inst{22-21} = 0b01;
2037 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2038 int_aarch64_neon_vsli> {
2043 // shift right insert (vector by immediate)
2044 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
2045 // 64-bit vector types.
2046 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2047 int_aarch64_neon_vsri> {
2048 let Inst{22-19} = 0b0001;
2051 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2052 int_aarch64_neon_vsri> {
2053 let Inst{22-20} = 0b001;
2056 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2057 int_aarch64_neon_vsri> {
2058 let Inst{22-21} = 0b01;
2061 // 128-bit vector types
2062 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2063 int_aarch64_neon_vsri> {
2064 let Inst{22-19} = 0b0001;
2067 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2068 int_aarch64_neon_vsri> {
2069 let Inst{22-20} = 0b001;
2072 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2073 int_aarch64_neon_vsri> {
2074 let Inst{22-21} = 0b01;
2077 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2078 int_aarch64_neon_vsri> {
2083 // Shift left and insert
2084 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2086 // Shift right and insert
2087 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2089 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2090 string SrcT, Operand ImmTy>
2091 : NeonI_2VShiftImm<q, u, opcode,
2092 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2093 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2096 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2097 string SrcT, Operand ImmTy>
2098 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2099 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2100 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2102 let Constraints = "$src = $Rd";
2105 // left long shift by immediate
2106 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2107 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2108 let Inst{22-19} = 0b0001;
2111 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2112 let Inst{22-20} = 0b001;
2115 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2116 let Inst{22-21} = 0b01;
2119 // Shift Narrow High
2120 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2122 let Inst{22-19} = 0b0001;
2125 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2127 let Inst{22-20} = 0b001;
2130 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2132 let Inst{22-21} = 0b01;
2136 // Shift right narrow
2137 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2139 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2140 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2141 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2142 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2143 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2144 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2145 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2146 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2148 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2149 (v2i64 (concat_vectors (v1i64 node:$Rm),
2150 (v1i64 node:$Rn)))>;
2151 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2152 (v8i16 (concat_vectors (v4i16 node:$Rm),
2153 (v4i16 node:$Rn)))>;
2154 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2155 (v4i32 (concat_vectors (v2i32 node:$Rm),
2156 (v2i32 node:$Rn)))>;
2157 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2158 (v4f32 (concat_vectors (v2f32 node:$Rm),
2159 (v2f32 node:$Rn)))>;
2160 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2161 (v2f64 (concat_vectors (v1f64 node:$Rm),
2162 (v1f64 node:$Rn)))>;
2164 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2165 (v8i16 (srl (v8i16 node:$lhs),
2166 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2167 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2168 (v4i32 (srl (v4i32 node:$lhs),
2169 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2170 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2171 (v2i64 (srl (v2i64 node:$lhs),
2172 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2173 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2174 (v8i16 (sra (v8i16 node:$lhs),
2175 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2176 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2177 (v4i32 (sra (v4i32 node:$lhs),
2178 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2179 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2180 (v2i64 (sra (v2i64 node:$lhs),
2181 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2183 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2184 multiclass Neon_shiftNarrow_patterns<string shr> {
2185 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2186 (i32 shr_imm8:$Imm)))),
2187 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2188 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2189 (i32 shr_imm16:$Imm)))),
2190 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2191 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2192 (i32 shr_imm32:$Imm)))),
2193 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2195 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2196 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2197 VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
2198 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2199 VPR128:$Rn, imm:$Imm)>;
2200 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2201 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2202 VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
2203 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2204 VPR128:$Rn, imm:$Imm)>;
2205 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2206 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2207 VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
2208 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2209 VPR128:$Rn, imm:$Imm)>;
2212 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2213 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
2214 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2215 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
2216 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2217 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
2218 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2220 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2221 (v1i64 (bitconvert (v8i8
2222 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
2223 (!cast<Instruction>(prefix # "_16B")
2224 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2225 VPR128:$Rn, imm:$Imm)>;
2226 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2227 (v1i64 (bitconvert (v4i16
2228 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
2229 (!cast<Instruction>(prefix # "_8H")
2230 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2231 VPR128:$Rn, imm:$Imm)>;
2232 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2233 (v1i64 (bitconvert (v2i32
2234 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
2235 (!cast<Instruction>(prefix # "_4S")
2236 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2237 VPR128:$Rn, imm:$Imm)>;
2240 defm : Neon_shiftNarrow_patterns<"lshr">;
2241 defm : Neon_shiftNarrow_patterns<"ashr">;
2243 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2244 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2245 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2246 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2247 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2248 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2249 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2251 // Convert fix-point and float-pointing
2252 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2253 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2254 Operand ImmTy, SDPatternOperator IntOp>
2255 : NeonI_2VShiftImm<q, u, opcode,
2256 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2257 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2258 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2259 (i32 ImmTy:$Imm))))],
2262 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2263 SDPatternOperator IntOp> {
2264 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2266 let Inst{22-21} = 0b01;
2269 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2271 let Inst{22-21} = 0b01;
2274 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2280 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2281 SDPatternOperator IntOp> {
2282 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2284 let Inst{22-21} = 0b01;
2287 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2289 let Inst{22-21} = 0b01;
2292 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2298 // Convert fixed-point to floating-point
2299 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2300 int_arm_neon_vcvtfxs2fp>;
2301 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2302 int_arm_neon_vcvtfxu2fp>;
2304 // Convert floating-point to fixed-point
2305 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2306 int_arm_neon_vcvtfp2fxs>;
2307 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2308 int_arm_neon_vcvtfp2fxu>;
2310 multiclass Neon_sshll2_0<SDNode ext>
2312 def _v8i8 : PatFrag<(ops node:$Rn),
2313 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2314 def _v4i16 : PatFrag<(ops node:$Rn),
2315 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2316 def _v2i32 : PatFrag<(ops node:$Rn),
2317 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2320 defm NI_sext_high : Neon_sshll2_0<sext>;
2321 defm NI_zext_high : Neon_sshll2_0<zext>;
2324 //===----------------------------------------------------------------------===//
2325 // Multiclasses for NeonI_Across
2326 //===----------------------------------------------------------------------===//
2330 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2331 string asmop, SDPatternOperator opnode>
2333 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2334 (outs FPR16:$Rd), (ins VPR64:$Rn),
2335 asmop # "\t$Rd, $Rn.8b",
2336 [(set (v1i16 FPR16:$Rd),
2337 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2340 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2341 (outs FPR16:$Rd), (ins VPR128:$Rn),
2342 asmop # "\t$Rd, $Rn.16b",
2343 [(set (v1i16 FPR16:$Rd),
2344 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2347 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2348 (outs FPR32:$Rd), (ins VPR64:$Rn),
2349 asmop # "\t$Rd, $Rn.4h",
2350 [(set (v1i32 FPR32:$Rd),
2351 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2354 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2355 (outs FPR32:$Rd), (ins VPR128:$Rn),
2356 asmop # "\t$Rd, $Rn.8h",
2357 [(set (v1i32 FPR32:$Rd),
2358 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2361 // _1d2s doesn't exist!
2363 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2364 (outs FPR64:$Rd), (ins VPR128:$Rn),
2365 asmop # "\t$Rd, $Rn.4s",
2366 [(set (v1i64 FPR64:$Rd),
2367 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2371 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2372 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2376 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2377 string asmop, SDPatternOperator opnode>
2379 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2380 (outs FPR8:$Rd), (ins VPR64:$Rn),
2381 asmop # "\t$Rd, $Rn.8b",
2382 [(set (v1i8 FPR8:$Rd),
2383 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2386 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2387 (outs FPR8:$Rd), (ins VPR128:$Rn),
2388 asmop # "\t$Rd, $Rn.16b",
2389 [(set (v1i8 FPR8:$Rd),
2390 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2393 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2394 (outs FPR16:$Rd), (ins VPR64:$Rn),
2395 asmop # "\t$Rd, $Rn.4h",
2396 [(set (v1i16 FPR16:$Rd),
2397 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2400 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2401 (outs FPR16:$Rd), (ins VPR128:$Rn),
2402 asmop # "\t$Rd, $Rn.8h",
2403 [(set (v1i16 FPR16:$Rd),
2404 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2407 // _1s2s doesn't exist!
2409 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2410 (outs FPR32:$Rd), (ins VPR128:$Rn),
2411 asmop # "\t$Rd, $Rn.4s",
2412 [(set (v1i32 FPR32:$Rd),
2413 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2417 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2418 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2420 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2421 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2423 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2427 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2428 string asmop, SDPatternOperator opnode> {
2429 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2430 (outs FPR32:$Rd), (ins VPR128:$Rn),
2431 asmop # "\t$Rd, $Rn.4s",
2432 [(set (f32 FPR32:$Rd),
2433 (f32 (opnode (v4f32 VPR128:$Rn))))],
2437 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2438 int_aarch64_neon_vmaxnmv>;
2439 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2440 int_aarch64_neon_vminnmv>;
2442 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2443 int_aarch64_neon_vmaxv>;
2444 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2445 int_aarch64_neon_vminv>;
2447 // The followings are for instruction class (Perm)
2449 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2450 string asmop, RegisterOperand OpVPR, string OpS,
2451 SDPatternOperator opnode, ValueType Ty>
2452 : NeonI_Perm<q, size, opcode,
2453 (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2454 asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2455 [(set (Ty OpVPR:$Rd),
2456 (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
2459 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
2460 SDPatternOperator opnode> {
2461 def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop,
2462 VPR64, "8b", opnode, v8i8>;
2463 def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
2464 VPR128, "16b",opnode, v16i8>;
2465 def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop,
2466 VPR64, "4h", opnode, v4i16>;
2467 def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop,
2468 VPR128, "8h", opnode, v8i16>;
2469 def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop,
2470 VPR64, "2s", opnode, v2i32>;
2471 def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop,
2472 VPR128, "4s", opnode, v4i32>;
2473 def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop,
2474 VPR128, "2d", opnode, v2i64>;
2477 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
2478 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
2479 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
2480 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
2481 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
2482 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
2484 multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
2485 def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
2486 (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
2488 def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
2489 (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
2491 def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
2492 (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
2495 defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
2496 defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
2497 defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
2498 defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
2499 defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
2500 defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
2502 // The followings are for instruction class (3V Diff)
2504 // normal long/long2 pattern
2505 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2506 string asmop, string ResS, string OpS,
2507 SDPatternOperator opnode, SDPatternOperator ext,
2508 RegisterOperand OpVPR,
2509 ValueType ResTy, ValueType OpTy>
2510 : NeonI_3VDiff<q, u, size, opcode,
2511 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2512 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2513 [(set (ResTy VPR128:$Rd),
2514 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2515 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2518 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2519 string asmop, SDPatternOperator opnode,
2520 bit Commutable = 0> {
2521 let isCommutable = Commutable in {
2522 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2523 opnode, sext, VPR64, v8i16, v8i8>;
2524 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2525 opnode, sext, VPR64, v4i32, v4i16>;
2526 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2527 opnode, sext, VPR64, v2i64, v2i32>;
2531 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2532 SDPatternOperator opnode, bit Commutable = 0> {
2533 let isCommutable = Commutable in {
2534 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2535 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2536 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2537 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2538 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2539 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2543 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2544 SDPatternOperator opnode, bit Commutable = 0> {
2545 let isCommutable = Commutable in {
2546 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2547 opnode, zext, VPR64, v8i16, v8i8>;
2548 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2549 opnode, zext, VPR64, v4i32, v4i16>;
2550 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2551 opnode, zext, VPR64, v2i64, v2i32>;
2555 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2556 SDPatternOperator opnode, bit Commutable = 0> {
2557 let isCommutable = Commutable in {
2558 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2559 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2560 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2561 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2562 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2563 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2567 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2568 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2570 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2571 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2573 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2574 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2576 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2577 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2579 // normal wide/wide2 pattern
2580 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2581 string asmop, string ResS, string OpS,
2582 SDPatternOperator opnode, SDPatternOperator ext,
2583 RegisterOperand OpVPR,
2584 ValueType ResTy, ValueType OpTy>
2585 : NeonI_3VDiff<q, u, size, opcode,
2586 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2587 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2588 [(set (ResTy VPR128:$Rd),
2589 (ResTy (opnode (ResTy VPR128:$Rn),
2590 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2593 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2594 SDPatternOperator opnode> {
2595 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2596 opnode, sext, VPR64, v8i16, v8i8>;
2597 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2598 opnode, sext, VPR64, v4i32, v4i16>;
2599 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2600 opnode, sext, VPR64, v2i64, v2i32>;
2603 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2604 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2606 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2607 SDPatternOperator opnode> {
2608 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2609 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2610 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2611 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2612 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2613 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2616 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2617 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2619 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2620 SDPatternOperator opnode> {
2621 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2622 opnode, zext, VPR64, v8i16, v8i8>;
2623 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2624 opnode, zext, VPR64, v4i32, v4i16>;
2625 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2626 opnode, zext, VPR64, v2i64, v2i32>;
2629 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2630 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2632 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2633 SDPatternOperator opnode> {
2634 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2635 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2636 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2637 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2638 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2639 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2642 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2643 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2645 // Get the high half part of the vector element.
2646 multiclass NeonI_get_high {
2647 def _8h : PatFrag<(ops node:$Rn),
2648 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2649 (v8i16 (Neon_vdup (i32 8)))))))>;
2650 def _4s : PatFrag<(ops node:$Rn),
2651 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2652 (v4i32 (Neon_vdup (i32 16)))))))>;
2653 def _2d : PatFrag<(ops node:$Rn),
2654 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2655 (v2i64 (Neon_vdup (i32 32)))))))>;
2658 defm NI_get_hi : NeonI_get_high;
2660 // pattern for addhn/subhn with 2 operands
2661 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2662 string asmop, string ResS, string OpS,
2663 SDPatternOperator opnode, SDPatternOperator get_hi,
2664 ValueType ResTy, ValueType OpTy>
2665 : NeonI_3VDiff<q, u, size, opcode,
2666 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2667 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2668 [(set (ResTy VPR64:$Rd),
2670 (OpTy (opnode (OpTy VPR128:$Rn),
2671 (OpTy VPR128:$Rm))))))],
2674 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2675 SDPatternOperator opnode, bit Commutable = 0> {
2676 let isCommutable = Commutable in {
2677 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2678 opnode, NI_get_hi_8h, v8i8, v8i16>;
2679 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2680 opnode, NI_get_hi_4s, v4i16, v4i32>;
2681 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2682 opnode, NI_get_hi_2d, v2i32, v2i64>;
2686 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2687 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2689 // pattern for operation with 2 operands
2690 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2691 string asmop, string ResS, string OpS,
2692 SDPatternOperator opnode,
2693 RegisterOperand ResVPR, RegisterOperand OpVPR,
2694 ValueType ResTy, ValueType OpTy>
2695 : NeonI_3VDiff<q, u, size, opcode,
2696 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2697 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2698 [(set (ResTy ResVPR:$Rd),
2699 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2702 // normal narrow pattern
2703 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2704 SDPatternOperator opnode, bit Commutable = 0> {
2705 let isCommutable = Commutable in {
2706 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2707 opnode, VPR64, VPR128, v8i8, v8i16>;
2708 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2709 opnode, VPR64, VPR128, v4i16, v4i32>;
2710 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2711 opnode, VPR64, VPR128, v2i32, v2i64>;
2715 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2716 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2718 // pattern for acle intrinsic with 3 operands
2719 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2720 string asmop, string ResS, string OpS>
2721 : NeonI_3VDiff<q, u, size, opcode,
2722 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2723 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2725 let Constraints = "$src = $Rd";
2726 let neverHasSideEffects = 1;
2729 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2730 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2731 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2732 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2735 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2736 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2738 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2739 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2741 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2743 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2744 SDPatternOperator coreop>
2745 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2746 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2747 (SrcTy VPR128:$Rm)))))),
2748 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2749 VPR128:$Rn, VPR128:$Rm)>;
2752 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2753 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2754 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2755 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2756 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2757 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2760 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2761 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2762 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2763 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2764 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2765 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2768 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2769 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2770 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2773 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2774 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2775 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2777 // pattern that need to extend result
2778 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2779 string asmop, string ResS, string OpS,
2780 SDPatternOperator opnode,
2781 RegisterOperand OpVPR,
2782 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2783 : NeonI_3VDiff<q, u, size, opcode,
2784 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2785 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2786 [(set (ResTy VPR128:$Rd),
2787 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2788 (OpTy OpVPR:$Rm))))))],
2791 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2792 SDPatternOperator opnode, bit Commutable = 0> {
2793 let isCommutable = Commutable in {
2794 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2795 opnode, VPR64, v8i16, v8i8, v8i8>;
2796 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2797 opnode, VPR64, v4i32, v4i16, v4i16>;
2798 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2799 opnode, VPR64, v2i64, v2i32, v2i32>;
2803 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2804 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2806 multiclass NeonI_Op_High<SDPatternOperator op> {
2807 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2808 (op (v8i8 (Neon_High16B node:$Rn)),
2809 (v8i8 (Neon_High16B node:$Rm)))>;
2810 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
2811 (op (v4i16 (Neon_High8H node:$Rn)),
2812 (v4i16 (Neon_High8H node:$Rm)))>;
2813 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
2814 (op (v2i32 (Neon_High4S node:$Rn)),
2815 (v2i32 (Neon_High4S node:$Rm)))>;
2818 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2819 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2820 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2821 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2822 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2823 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2825 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
2826 bit Commutable = 0> {
2827 let isCommutable = Commutable in {
2828 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2829 !cast<PatFrag>(opnode # "_16B"),
2830 VPR128, v8i16, v16i8, v8i8>;
2831 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2832 !cast<PatFrag>(opnode # "_8H"),
2833 VPR128, v4i32, v8i16, v4i16>;
2834 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2835 !cast<PatFrag>(opnode # "_4S"),
2836 VPR128, v2i64, v4i32, v2i32>;
2840 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2841 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2843 // For pattern that need two operators being chained.
2844 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2845 string asmop, string ResS, string OpS,
2846 SDPatternOperator opnode, SDPatternOperator subop,
2847 RegisterOperand OpVPR,
2848 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2849 : NeonI_3VDiff<q, u, size, opcode,
2850 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2851 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2852 [(set (ResTy VPR128:$Rd),
2854 (ResTy VPR128:$src),
2855 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2856 (OpTy OpVPR:$Rm))))))))],
2858 let Constraints = "$src = $Rd";
2861 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
2862 SDPatternOperator opnode, SDPatternOperator subop>{
2863 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2864 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2865 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2866 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2867 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2868 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2871 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2872 add, int_arm_neon_vabds>;
2873 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2874 add, int_arm_neon_vabdu>;
2876 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
2877 SDPatternOperator opnode, string subop> {
2878 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2879 opnode, !cast<PatFrag>(subop # "_16B"),
2880 VPR128, v8i16, v16i8, v8i8>;
2881 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2882 opnode, !cast<PatFrag>(subop # "_8H"),
2883 VPR128, v4i32, v8i16, v4i16>;
2884 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2885 opnode, !cast<PatFrag>(subop # "_4S"),
2886 VPR128, v2i64, v4i32, v2i32>;
2889 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2891 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2894 // Long pattern with 2 operands
2895 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
2896 SDPatternOperator opnode, bit Commutable = 0> {
2897 let isCommutable = Commutable in {
2898 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2899 opnode, VPR128, VPR64, v8i16, v8i8>;
2900 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2901 opnode, VPR128, VPR64, v4i32, v4i16>;
2902 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2903 opnode, VPR128, VPR64, v2i64, v2i32>;
2907 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2908 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2910 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2911 string asmop, string ResS, string OpS,
2912 SDPatternOperator opnode,
2913 ValueType ResTy, ValueType OpTy>
2914 : NeonI_3VDiff<q, u, size, opcode,
2915 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2916 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2917 [(set (ResTy VPR128:$Rd),
2918 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2921 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
2922 string opnode, bit Commutable = 0> {
2923 let isCommutable = Commutable in {
2924 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2925 !cast<PatFrag>(opnode # "_16B"),
2927 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2928 !cast<PatFrag>(opnode # "_8H"),
2930 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2931 !cast<PatFrag>(opnode # "_4S"),
2936 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2938 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2941 // Long pattern with 3 operands
2942 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2943 string asmop, string ResS, string OpS,
2944 SDPatternOperator opnode,
2945 ValueType ResTy, ValueType OpTy>
2946 : NeonI_3VDiff<q, u, size, opcode,
2947 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2948 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2949 [(set (ResTy VPR128:$Rd),
2951 (ResTy VPR128:$src),
2952 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2954 let Constraints = "$src = $Rd";
2957 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
2958 SDPatternOperator opnode> {
2959 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2960 opnode, v8i16, v8i8>;
2961 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2962 opnode, v4i32, v4i16>;
2963 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2964 opnode, v2i64, v2i32>;
2967 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2969 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2971 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2973 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2975 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2977 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2979 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2981 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2983 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2984 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2986 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2987 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2989 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2990 string asmop, string ResS, string OpS,
2991 SDPatternOperator subop, SDPatternOperator opnode,
2992 RegisterOperand OpVPR,
2993 ValueType ResTy, ValueType OpTy>
2994 : NeonI_3VDiff<q, u, size, opcode,
2995 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2996 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2997 [(set (ResTy VPR128:$Rd),
2999 (ResTy VPR128:$src),
3000 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3002 let Constraints = "$src = $Rd";
3005 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
3006 SDPatternOperator subop, string opnode> {
3007 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3008 subop, !cast<PatFrag>(opnode # "_16B"),
3009 VPR128, v8i16, v16i8>;
3010 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3011 subop, !cast<PatFrag>(opnode # "_8H"),
3012 VPR128, v4i32, v8i16>;
3013 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3014 subop, !cast<PatFrag>(opnode # "_4S"),
3015 VPR128, v2i64, v4i32>;
3018 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3019 add, "NI_smull_hi">;
3020 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3021 add, "NI_umull_hi">;
3023 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3024 sub, "NI_smull_hi">;
3025 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3026 sub, "NI_umull_hi">;
3028 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3029 SDPatternOperator opnode> {
3030 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3031 opnode, int_arm_neon_vqdmull,
3032 VPR64, v4i32, v4i16>;
3033 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3034 opnode, int_arm_neon_vqdmull,
3035 VPR64, v2i64, v2i32>;
3038 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3039 int_arm_neon_vqadds>;
3040 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3041 int_arm_neon_vqsubs>;
3043 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3044 SDPatternOperator opnode, bit Commutable = 0> {
3045 let isCommutable = Commutable in {
3046 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3047 opnode, VPR128, VPR64, v4i32, v4i16>;
3048 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3049 opnode, VPR128, VPR64, v2i64, v2i32>;
3053 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3054 int_arm_neon_vqdmull, 1>;
3056 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
3057 string opnode, bit Commutable = 0> {
3058 let isCommutable = Commutable in {
3059 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3060 !cast<PatFrag>(opnode # "_8H"),
3062 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3063 !cast<PatFrag>(opnode # "_4S"),
3068 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
3071 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
3072 SDPatternOperator opnode> {
3073 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3074 opnode, NI_qdmull_hi_8H,
3075 VPR128, v4i32, v8i16>;
3076 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3077 opnode, NI_qdmull_hi_4S,
3078 VPR128, v2i64, v4i32>;
3081 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3082 int_arm_neon_vqadds>;
3083 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3084 int_arm_neon_vqsubs>;
3086 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3087 SDPatternOperator opnode_8h8b,
3088 SDPatternOperator opnode_1q1d, bit Commutable = 0> {
3089 let isCommutable = Commutable in {
3090 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3091 opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
3093 def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
3094 opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
3098 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
3099 int_aarch64_neon_vmull_p64, 1>;
3101 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
3102 string opnode, bit Commutable = 0> {
3103 let isCommutable = Commutable in {
3104 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3105 !cast<PatFrag>(opnode # "_16B"),
3109 NeonI_3VDiff<0b1, u, 0b11, opcode,
3110 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3111 asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
3112 [(set (v16i8 VPR128:$Rd),
3113 (v16i8 (int_aarch64_neon_vmull_p64
3114 (v1i64 (scalar_to_vector
3115 (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
3116 (v1i64 (scalar_to_vector
3117 (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
3122 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3125 // End of implementation for instruction class (3V Diff)
3127 // The followings are vector load/store multiple N-element structure
3128 // (class SIMD lselem).
3130 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
3131 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3132 // The structure consists of a sequence of sets of N values.
3133 // The first element of the structure is placed in the first lane
3134 // of the first first vector, the second element in the first lane
3135 // of the second vector, and so on.
3136 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3137 // the three 64-bit vectors list {BA, DC, FE}.
3138 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3139 // 64-bit vectors list {DA, EB, FC}.
3140 // Store instructions store multiple structure to N registers like load.
3143 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3144 RegisterOperand VecList, string asmop>
3145 : NeonI_LdStMult<q, 1, opcode, size,
3146 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3147 asmop # "\t$Rt, [$Rn]",
3151 let neverHasSideEffects = 1;
3154 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3155 def _8B : NeonI_LDVList<0, opcode, 0b00,
3156 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3158 def _4H : NeonI_LDVList<0, opcode, 0b01,
3159 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3161 def _2S : NeonI_LDVList<0, opcode, 0b10,
3162 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3164 def _16B : NeonI_LDVList<1, opcode, 0b00,
3165 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3167 def _8H : NeonI_LDVList<1, opcode, 0b01,
3168 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3170 def _4S : NeonI_LDVList<1, opcode, 0b10,
3171 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3173 def _2D : NeonI_LDVList<1, opcode, 0b11,
3174 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3177 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3178 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3179 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3181 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3183 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3185 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3187 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3188 defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
3189 def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3191 defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3192 def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3194 defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3195 def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3197 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3198 RegisterOperand VecList, string asmop>
3199 : NeonI_LdStMult<q, 0, opcode, size,
3200 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3201 asmop # "\t$Rt, [$Rn]",
3205 let neverHasSideEffects = 1;
3208 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3209 def _8B : NeonI_STVList<0, opcode, 0b00,
3210 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3212 def _4H : NeonI_STVList<0, opcode, 0b01,
3213 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3215 def _2S : NeonI_STVList<0, opcode, 0b10,
3216 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3218 def _16B : NeonI_STVList<1, opcode, 0b00,
3219 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3221 def _8H : NeonI_STVList<1, opcode, 0b01,
3222 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3224 def _4S : NeonI_STVList<1, opcode, 0b10,
3225 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3227 def _2D : NeonI_STVList<1, opcode, 0b11,
3228 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3231 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3232 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3233 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3235 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3237 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3239 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3241 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3242 defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
3243 def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3245 defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
3246 def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3248 defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
3249 def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3251 def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3252 def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3254 def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3255 def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3257 def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
3258 def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
3260 def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3261 def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3263 def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3264 def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3266 def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
3267 def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
3269 def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
3270 (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3271 def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
3272 (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3274 def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
3275 (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3276 def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
3277 (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3279 def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
3280 (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
3281 def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
3282 (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
3284 def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
3285 (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3286 def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
3287 (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3289 def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
3290 (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3291 def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
3292 (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3294 def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
3295 (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
3296 def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
3297 (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
3299 // Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
3300 // FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
3301 // these patterns are not needed any more.
3302 def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
3303 def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
3304 def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
3306 def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
3307 (LSFP8_STR $value, $addr, 0)>;
3308 def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
3309 (LSFP16_STR $value, $addr, 0)>;
3310 def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
3311 (LSFP32_STR $value, $addr, 0)>;
3314 // End of vector load/store multiple N-element structure(class SIMD lselem)
3316 // The followings are post-index vector load/store multiple N-element
3317 // structure(class SIMD lselem-post)
3318 def exact1_asmoperand : AsmOperandClass {
3319 let Name = "Exact1";
3320 let PredicateMethod = "isExactImm<1>";
3321 let RenderMethod = "addImmOperands";
3323 def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
3324 let ParserMatchClass = exact1_asmoperand;
3327 def exact2_asmoperand : AsmOperandClass {
3328 let Name = "Exact2";
3329 let PredicateMethod = "isExactImm<2>";
3330 let RenderMethod = "addImmOperands";
3332 def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
3333 let ParserMatchClass = exact2_asmoperand;
3336 def exact3_asmoperand : AsmOperandClass {
3337 let Name = "Exact3";
3338 let PredicateMethod = "isExactImm<3>";
3339 let RenderMethod = "addImmOperands";
3341 def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
3342 let ParserMatchClass = exact3_asmoperand;
3345 def exact4_asmoperand : AsmOperandClass {
3346 let Name = "Exact4";
3347 let PredicateMethod = "isExactImm<4>";
3348 let RenderMethod = "addImmOperands";
3350 def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
3351 let ParserMatchClass = exact4_asmoperand;
3354 def exact6_asmoperand : AsmOperandClass {
3355 let Name = "Exact6";
3356 let PredicateMethod = "isExactImm<6>";
3357 let RenderMethod = "addImmOperands";
3359 def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
3360 let ParserMatchClass = exact6_asmoperand;
3363 def exact8_asmoperand : AsmOperandClass {
3364 let Name = "Exact8";
3365 let PredicateMethod = "isExactImm<8>";
3366 let RenderMethod = "addImmOperands";
3368 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3369 let ParserMatchClass = exact8_asmoperand;
3372 def exact12_asmoperand : AsmOperandClass {
3373 let Name = "Exact12";
3374 let PredicateMethod = "isExactImm<12>";
3375 let RenderMethod = "addImmOperands";
3377 def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
3378 let ParserMatchClass = exact12_asmoperand;
3381 def exact16_asmoperand : AsmOperandClass {
3382 let Name = "Exact16";
3383 let PredicateMethod = "isExactImm<16>";
3384 let RenderMethod = "addImmOperands";
3386 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3387 let ParserMatchClass = exact16_asmoperand;
3390 def exact24_asmoperand : AsmOperandClass {
3391 let Name = "Exact24";
3392 let PredicateMethod = "isExactImm<24>";
3393 let RenderMethod = "addImmOperands";
3395 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3396 let ParserMatchClass = exact24_asmoperand;
3399 def exact32_asmoperand : AsmOperandClass {
3400 let Name = "Exact32";
3401 let PredicateMethod = "isExactImm<32>";
3402 let RenderMethod = "addImmOperands";
3404 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3405 let ParserMatchClass = exact32_asmoperand;
3408 def exact48_asmoperand : AsmOperandClass {
3409 let Name = "Exact48";
3410 let PredicateMethod = "isExactImm<48>";
3411 let RenderMethod = "addImmOperands";
3413 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3414 let ParserMatchClass = exact48_asmoperand;
3417 def exact64_asmoperand : AsmOperandClass {
3418 let Name = "Exact64";
3419 let PredicateMethod = "isExactImm<64>";
3420 let RenderMethod = "addImmOperands";
3422 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3423 let ParserMatchClass = exact64_asmoperand;
3426 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3427 RegisterOperand VecList, Operand ImmTy,
3429 let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3430 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3431 def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3432 (outs VecList:$Rt, GPR64xsp:$wb),
3433 (ins GPR64xsp:$Rn, ImmTy:$amt),
3434 asmop # "\t$Rt, [$Rn], $amt",
3440 def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3441 (outs VecList:$Rt, GPR64xsp:$wb),
3442 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3443 asmop # "\t$Rt, [$Rn], $Rm",
3449 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3450 Operand ImmTy2, string asmop> {
3451 defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3452 !cast<RegisterOperand>(List # "8B_operand"),
3455 defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3456 !cast<RegisterOperand>(List # "4H_operand"),
3459 defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3460 !cast<RegisterOperand>(List # "2S_operand"),
3463 defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3464 !cast<RegisterOperand>(List # "16B_operand"),
3467 defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3468 !cast<RegisterOperand>(List # "8H_operand"),
3471 defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3472 !cast<RegisterOperand>(List # "4S_operand"),
3475 defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3476 !cast<RegisterOperand>(List # "2D_operand"),
3480 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3481 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3482 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3485 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3487 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3490 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3492 // Post-index load multiple 1-element structures from N consecutive registers
3494 defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3496 defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3497 uimm_exact16, "ld1">;
3499 defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3501 defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3502 uimm_exact24, "ld1">;
3504 defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3506 defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3507 uimm_exact32, "ld1">;
3509 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3510 RegisterOperand VecList, Operand ImmTy,
3512 let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3513 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3514 def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3515 (outs GPR64xsp:$wb),
3516 (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3517 asmop # "\t$Rt, [$Rn], $amt",
3523 def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3524 (outs GPR64xsp:$wb),
3525 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3526 asmop # "\t$Rt, [$Rn], $Rm",
3532 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3533 Operand ImmTy2, string asmop> {
3534 defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3535 !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3537 defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3538 !cast<RegisterOperand>(List # "4H_operand"),
3541 defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3542 !cast<RegisterOperand>(List # "2S_operand"),
3545 defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3546 !cast<RegisterOperand>(List # "16B_operand"),
3549 defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3550 !cast<RegisterOperand>(List # "8H_operand"),
3553 defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3554 !cast<RegisterOperand>(List # "4S_operand"),
3557 defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3558 !cast<RegisterOperand>(List # "2D_operand"),
3562 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3563 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3564 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3567 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3569 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3572 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3574 // Post-index load multiple 1-element structures from N consecutive registers
3576 defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3578 defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3579 uimm_exact16, "st1">;
3581 defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3583 defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3584 uimm_exact24, "st1">;
3586 defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3588 defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3589 uimm_exact32, "st1">;
3591 // End of post-index vector load/store multiple N-element structure
3592 // (class SIMD lselem-post)
3594 // The followings are vector load/store single N-element structure
3595 // (class SIMD lsone).
3596 def neon_uimm0_bare : Operand<i64>,
3597 ImmLeaf<i64, [{return Imm == 0;}]> {
3598 let ParserMatchClass = neon_uimm0_asmoperand;
3599 let PrintMethod = "printUImmBareOperand";
3602 def neon_uimm1_bare : Operand<i64>,
3603 ImmLeaf<i64, [{return Imm < 2;}]> {
3604 let ParserMatchClass = neon_uimm1_asmoperand;
3605 let PrintMethod = "printUImmBareOperand";
3608 def neon_uimm2_bare : Operand<i64>,
3609 ImmLeaf<i64, [{return Imm < 4;}]> {
3610 let ParserMatchClass = neon_uimm2_asmoperand;
3611 let PrintMethod = "printUImmBareOperand";
3614 def neon_uimm3_bare : Operand<i64>,
3615 ImmLeaf<i64, [{return Imm < 8;}]> {
3616 let ParserMatchClass = uimm3_asmoperand;
3617 let PrintMethod = "printUImmBareOperand";
3620 def neon_uimm4_bare : Operand<i64>,
3621 ImmLeaf<i64, [{return Imm < 16;}]> {
3622 let ParserMatchClass = uimm4_asmoperand;
3623 let PrintMethod = "printUImmBareOperand";
3626 class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3627 RegisterOperand VecList, string asmop>
3628 : NeonI_LdOne_Dup<q, r, opcode, size,
3629 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3630 asmop # "\t$Rt, [$Rn]",
3634 let neverHasSideEffects = 1;
3637 multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
3638 def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
3639 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3641 def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
3642 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3644 def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
3645 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3647 def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
3648 !cast<RegisterOperand>(List # "1D_operand"), asmop>;
3650 def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
3651 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3653 def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
3654 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3656 def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
3657 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3659 def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
3660 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3663 // Load single 1-element structure to all lanes of 1 register
3664 defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
3666 // Load single N-element structure to all lanes of N consecutive
3667 // registers (N = 2,3,4)
3668 defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
3669 defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
3670 defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
3673 class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3675 : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
3676 (VTy (INST GPR64xsp:$Rn))>;
3678 // Match all LD1R instructions
3679 def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
3681 def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
3683 def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
3685 def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
3687 def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
3688 def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
3690 def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
3691 def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
3693 def : LD1R_pattern<v1i64, i64, load, LD1R_1D>;
3694 def : LD1R_pattern<v1f64, f64, load, LD1R_1D>;
3696 def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
3697 def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
3700 multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
3701 RegisterClass RegList> {
3702 defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
3703 defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
3704 defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
3705 defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
3708 // Special vector list operand of 128-bit vectors with bare layout.
3709 // i.e. only show ".b", ".h", ".s", ".d"
3710 defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
3711 defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
3712 defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
3713 defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
3715 class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3716 Operand ImmOp, string asmop>
3717 : NeonI_LdStOne_Lane<1, r, op2_1, op0,
3719 (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
3720 asmop # "\t$Rt[$lane], [$Rn]",
3724 let neverHasSideEffects = 1;
3725 let hasExtraDefRegAllocReq = 1;
3726 let Constraints = "$src = $Rt";
3729 multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3730 def _B : NeonI_LDN_Lane<r, 0b00, op0,
3731 !cast<RegisterOperand>(List # "B_operand"),
3732 neon_uimm4_bare, asmop> {
3733 let Inst{12-10} = lane{2-0};
3734 let Inst{30} = lane{3};
3737 def _H : NeonI_LDN_Lane<r, 0b01, op0,
3738 !cast<RegisterOperand>(List # "H_operand"),
3739 neon_uimm3_bare, asmop> {
3740 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3741 let Inst{30} = lane{2};
3744 def _S : NeonI_LDN_Lane<r, 0b10, op0,
3745 !cast<RegisterOperand>(List # "S_operand"),
3746 neon_uimm2_bare, asmop> {
3747 let Inst{12-10} = {lane{0}, 0b0, 0b0};
3748 let Inst{30} = lane{1};
3751 def _D : NeonI_LDN_Lane<r, 0b10, op0,
3752 !cast<RegisterOperand>(List # "D_operand"),
3753 neon_uimm1_bare, asmop> {
3754 let Inst{12-10} = 0b001;
3755 let Inst{30} = lane{0};
3759 // Load single 1-element structure to one lane of 1 register.
3760 defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
3762 // Load single N-element structure to one lane of N consecutive registers
3764 defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
3765 defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
3766 defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
3768 multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3769 Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
3771 def : Pat<(VTy (vector_insert (VTy VPR64:$src),
3772 (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
3773 (VTy (EXTRACT_SUBREG
3775 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
3779 def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
3780 (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
3781 (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
3784 // Match all LD1LN instructions
3785 defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3786 extloadi8, LD1LN_B>;
3788 defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3789 extloadi16, LD1LN_H>;
3791 defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3793 defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3796 defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3798 defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3801 class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3802 Operand ImmOp, string asmop>
3803 : NeonI_LdStOne_Lane<0, r, op2_1, op0,
3804 (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
3805 asmop # "\t$Rt[$lane], [$Rn]",
3809 let neverHasSideEffects = 1;
3810 let hasExtraDefRegAllocReq = 1;
3813 multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3814 def _B : NeonI_STN_Lane<r, 0b00, op0,
3815 !cast<RegisterOperand>(List # "B_operand"),
3816 neon_uimm4_bare, asmop> {
3817 let Inst{12-10} = lane{2-0};
3818 let Inst{30} = lane{3};
3821 def _H : NeonI_STN_Lane<r, 0b01, op0,
3822 !cast<RegisterOperand>(List # "H_operand"),
3823 neon_uimm3_bare, asmop> {
3824 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3825 let Inst{30} = lane{2};
3828 def _S : NeonI_STN_Lane<r, 0b10, op0,
3829 !cast<RegisterOperand>(List # "S_operand"),
3830 neon_uimm2_bare, asmop> {
3831 let Inst{12-10} = {lane{0}, 0b0, 0b0};
3832 let Inst{30} = lane{1};
3835 def _D : NeonI_STN_Lane<r, 0b10, op0,
3836 !cast<RegisterOperand>(List # "D_operand"),
3837 neon_uimm1_bare, asmop>{
3838 let Inst{12-10} = 0b001;
3839 let Inst{30} = lane{0};
3843 // Store single 1-element structure from one lane of 1 register.
3844 defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
3846 // Store single N-element structure from one lane of N consecutive registers
3848 defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
3849 defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
3850 defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
3852 multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3853 Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
3855 def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
3858 (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
3861 def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
3863 (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
3866 // Match all ST1LN instructions
3867 defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3868 truncstorei8, ST1LN_B>;
3870 defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3871 truncstorei16, ST1LN_H>;
3873 defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3875 defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3878 defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3880 defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3883 // End of vector load/store single N-element structure (class SIMD lsone).
3886 // The following are post-index load/store single N-element instructions
3887 // (class SIMD lsone-post)
3889 multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3890 RegisterOperand VecList, Operand ImmTy,
3892 let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
3893 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
3894 def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
3895 (outs VecList:$Rt, GPR64xsp:$wb),
3896 (ins GPR64xsp:$Rn, ImmTy:$amt),
3897 asmop # "\t$Rt, [$Rn], $amt",
3903 def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
3904 (outs VecList:$Rt, GPR64xsp:$wb),
3905 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3906 asmop # "\t$Rt, [$Rn], $Rm",
3912 multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
3913 Operand uimm_b, Operand uimm_h,
3914 Operand uimm_s, Operand uimm_d> {
3915 defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
3916 !cast<RegisterOperand>(List # "8B_operand"),
3919 defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
3920 !cast<RegisterOperand>(List # "4H_operand"),
3923 defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
3924 !cast<RegisterOperand>(List # "2S_operand"),
3927 defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
3928 !cast<RegisterOperand>(List # "1D_operand"),
3931 defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
3932 !cast<RegisterOperand>(List # "16B_operand"),
3935 defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
3936 !cast<RegisterOperand>(List # "8H_operand"),
3939 defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
3940 !cast<RegisterOperand>(List # "4S_operand"),
3943 defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
3944 !cast<RegisterOperand>(List # "2D_operand"),
3948 // Post-index load single 1-element structure to all lanes of 1 register
3949 defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
3950 uimm_exact2, uimm_exact4, uimm_exact8>;
3952 // Post-index load single N-element structure to all lanes of N consecutive
3953 // registers (N = 2,3,4)
3954 defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
3955 uimm_exact4, uimm_exact8, uimm_exact16>;
3956 defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
3957 uimm_exact6, uimm_exact12, uimm_exact24>;
3958 defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
3959 uimm_exact8, uimm_exact16, uimm_exact32>;
3961 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
3962 Constraints = "$Rn = $wb, $Rt = $src",
3963 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
3964 class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3965 Operand ImmTy, Operand ImmOp, string asmop>
3966 : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
3967 (outs VList:$Rt, GPR64xsp:$wb),
3968 (ins GPR64xsp:$Rn, ImmTy:$amt,
3969 VList:$src, ImmOp:$lane),
3970 asmop # "\t$Rt[$lane], [$Rn], $amt",
3976 class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3977 Operand ImmTy, Operand ImmOp, string asmop>
3978 : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
3979 (outs VList:$Rt, GPR64xsp:$wb),
3980 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
3981 VList:$src, ImmOp:$lane),
3982 asmop # "\t$Rt[$lane], [$Rn], $Rm",
3987 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
3988 Operand uimm_b, Operand uimm_h,
3989 Operand uimm_s, Operand uimm_d> {
3990 def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
3991 !cast<RegisterOperand>(List # "B_operand"),
3992 uimm_b, neon_uimm4_bare, asmop> {
3993 let Inst{12-10} = lane{2-0};
3994 let Inst{30} = lane{3};
3997 def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
3998 !cast<RegisterOperand>(List # "B_operand"),
3999 uimm_b, neon_uimm4_bare, asmop> {
4000 let Inst{12-10} = lane{2-0};
4001 let Inst{30} = lane{3};
4004 def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
4005 !cast<RegisterOperand>(List # "H_operand"),
4006 uimm_h, neon_uimm3_bare, asmop> {
4007 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4008 let Inst{30} = lane{2};
4011 def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
4012 !cast<RegisterOperand>(List # "H_operand"),
4013 uimm_h, neon_uimm3_bare, asmop> {
4014 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4015 let Inst{30} = lane{2};
4018 def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4019 !cast<RegisterOperand>(List # "S_operand"),
4020 uimm_s, neon_uimm2_bare, asmop> {
4021 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4022 let Inst{30} = lane{1};
4025 def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
4026 !cast<RegisterOperand>(List # "S_operand"),
4027 uimm_s, neon_uimm2_bare, asmop> {
4028 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4029 let Inst{30} = lane{1};
4032 def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4033 !cast<RegisterOperand>(List # "D_operand"),
4034 uimm_d, neon_uimm1_bare, asmop> {
4035 let Inst{12-10} = 0b001;
4036 let Inst{30} = lane{0};
4039 def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
4040 !cast<RegisterOperand>(List # "D_operand"),
4041 uimm_d, neon_uimm1_bare, asmop> {
4042 let Inst{12-10} = 0b001;
4043 let Inst{30} = lane{0};
4047 // Post-index load single 1-element structure to one lane of 1 register.
4048 defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
4049 uimm_exact2, uimm_exact4, uimm_exact8>;
4051 // Post-index load single N-element structure to one lane of N consecutive
4054 defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
4055 uimm_exact4, uimm_exact8, uimm_exact16>;
4056 defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
4057 uimm_exact6, uimm_exact12, uimm_exact24>;
4058 defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
4059 uimm_exact8, uimm_exact16, uimm_exact32>;
4061 let mayStore = 1, neverHasSideEffects = 1,
4062 hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
4063 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4064 class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4065 Operand ImmTy, Operand ImmOp, string asmop>
4066 : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4067 (outs GPR64xsp:$wb),
4068 (ins GPR64xsp:$Rn, ImmTy:$amt,
4069 VList:$Rt, ImmOp:$lane),
4070 asmop # "\t$Rt[$lane], [$Rn], $amt",
4076 class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4077 Operand ImmTy, Operand ImmOp, string asmop>
4078 : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4079 (outs GPR64xsp:$wb),
4080 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
4082 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4087 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4088 Operand uimm_b, Operand uimm_h,
4089 Operand uimm_s, Operand uimm_d> {
4090 def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
4091 !cast<RegisterOperand>(List # "B_operand"),
4092 uimm_b, neon_uimm4_bare, asmop> {
4093 let Inst{12-10} = lane{2-0};
4094 let Inst{30} = lane{3};
4097 def _B_register : STN_WBReg_Lane<r, 0b00, op0,
4098 !cast<RegisterOperand>(List # "B_operand"),
4099 uimm_b, neon_uimm4_bare, asmop> {
4100 let Inst{12-10} = lane{2-0};
4101 let Inst{30} = lane{3};
4104 def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
4105 !cast<RegisterOperand>(List # "H_operand"),
4106 uimm_h, neon_uimm3_bare, asmop> {
4107 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4108 let Inst{30} = lane{2};
4111 def _H_register : STN_WBReg_Lane<r, 0b01, op0,
4112 !cast<RegisterOperand>(List # "H_operand"),
4113 uimm_h, neon_uimm3_bare, asmop> {
4114 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4115 let Inst{30} = lane{2};
4118 def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
4119 !cast<RegisterOperand>(List # "S_operand"),
4120 uimm_s, neon_uimm2_bare, asmop> {
4121 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4122 let Inst{30} = lane{1};
4125 def _S_register : STN_WBReg_Lane<r, 0b10, op0,
4126 !cast<RegisterOperand>(List # "S_operand"),
4127 uimm_s, neon_uimm2_bare, asmop> {
4128 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4129 let Inst{30} = lane{1};
4132 def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
4133 !cast<RegisterOperand>(List # "D_operand"),
4134 uimm_d, neon_uimm1_bare, asmop> {
4135 let Inst{12-10} = 0b001;
4136 let Inst{30} = lane{0};
4139 def _D_register : STN_WBReg_Lane<r, 0b10, op0,
4140 !cast<RegisterOperand>(List # "D_operand"),
4141 uimm_d, neon_uimm1_bare, asmop> {
4142 let Inst{12-10} = 0b001;
4143 let Inst{30} = lane{0};
4147 // Post-index store single 1-element structure from one lane of 1 register.
4148 defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
4149 uimm_exact2, uimm_exact4, uimm_exact8>;
4151 // Post-index store single N-element structure from one lane of N consecutive
4152 // registers (N = 2,3,4)
4153 defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
4154 uimm_exact4, uimm_exact8, uimm_exact16>;
4155 defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
4156 uimm_exact6, uimm_exact12, uimm_exact24>;
4157 defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
4158 uimm_exact8, uimm_exact16, uimm_exact32>;
4160 // End of post-index load/store single N-element instructions
4161 // (class SIMD lsone-post)
4163 // Neon Scalar instructions implementation
4164 // Scalar Three Same
4166 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4168 : NeonI_Scalar3Same<u, size, opcode,
4169 (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
4170 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4174 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
4175 : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4177 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
4178 bit Commutable = 0> {
4179 let isCommutable = Commutable in {
4180 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4181 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4185 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
4186 string asmop, bit Commutable = 0> {
4187 let isCommutable = Commutable in {
4188 def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
4189 def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
4193 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
4194 string asmop, bit Commutable = 0> {
4195 let isCommutable = Commutable in {
4196 def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
4197 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4198 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4199 def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4203 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
4204 Instruction INSTD> {
4205 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
4206 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4209 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
4214 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
4215 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
4216 (INSTB FPR8:$Rn, FPR8:$Rm)>;
4217 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4218 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4219 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4220 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4223 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
4225 Instruction INSTS> {
4226 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4227 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4228 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4229 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4232 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
4233 ValueType SResTy, ValueType STy,
4234 Instruction INSTS, ValueType DResTy,
4235 ValueType DTy, Instruction INSTD> {
4236 def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
4237 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4238 def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
4239 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4242 class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
4244 : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
4245 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4247 // Scalar Three Different
4249 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
4250 RegisterClass FPRCD, RegisterClass FPRCS>
4251 : NeonI_Scalar3Diff<u, size, opcode,
4252 (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
4253 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4257 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
4258 def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
4259 def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
4262 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
4263 let Constraints = "$Src = $Rd" in {
4264 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
4265 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
4266 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4269 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
4270 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
4271 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4277 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
4279 Instruction INSTS> {
4280 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4281 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4282 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4283 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4286 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
4288 Instruction INSTS> {
4289 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4290 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
4291 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4292 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
4295 // Scalar Two Registers Miscellaneous
4297 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4298 RegisterClass FPRCD, RegisterClass FPRCS>
4299 : NeonI_Scalar2SameMisc<u, size, opcode,
4300 (outs FPRCD:$Rd), (ins FPRCS:$Rn),
4301 !strconcat(asmop, "\t$Rd, $Rn"),
4305 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
4307 def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
4309 def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
4313 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
4314 def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
4317 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
4318 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
4319 def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
4320 def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
4321 def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
4324 class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
4325 : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
4327 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
4329 def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
4330 def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
4331 def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
4334 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
4335 string asmop, RegisterClass FPRC>
4336 : NeonI_Scalar2SameMisc<u, size, opcode,
4337 (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
4338 !strconcat(asmop, "\t$Rd, $Rn"),
4342 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
4345 let Constraints = "$Src = $Rd" in {
4346 def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
4347 def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
4348 def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
4349 def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
4353 class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
4355 : Pat<(f32 (opnode (f64 FPR64:$Rn))),
4358 multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
4360 Instruction INSTD> {
4361 def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
4363 def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
4367 class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
4369 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4372 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode,
4374 Instruction INSTD> {
4375 def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))),
4377 def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))),
4381 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
4383 Instruction INSTD> {
4384 def : Pat<(f32 (opnode (f32 FPR32:$Rn))),
4386 def : Pat<(f64 (opnode (f64 FPR64:$Rn))),
4390 class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode,
4392 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
4395 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
4396 : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4397 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
4398 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4402 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
4404 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
4405 (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
4406 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4409 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4410 (outs FPR64:$Rd), (ins FPR64:$Rn, fpz32:$FPImm),
4411 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4416 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
4418 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4419 (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
4420 (INSTD FPR64:$Rn, 0)>;
4422 class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
4424 : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
4425 (i32 neon_uimm0:$Imm), CC)),
4426 (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
4428 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
4431 Instruction INSTD> {
4432 def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpz32:$FPImm))),
4433 (INSTS FPR32:$Rn, fpz32:$FPImm)>;
4434 def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpz32:$FPImm))),
4435 (INSTD FPR64:$Rn, fpz32:$FPImm)>;
4436 def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), CC)),
4437 (INSTD FPR64:$Rn, fpz32:$FPImm)>;
4440 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
4441 Instruction INSTD> {
4442 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
4446 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
4451 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
4452 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
4454 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
4456 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
4460 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
4461 SDPatternOperator opnode,
4464 Instruction INSTD> {
4465 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
4467 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
4469 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
4474 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
4475 SDPatternOperator opnode,
4479 Instruction INSTD> {
4480 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
4481 (INSTB FPR8:$Src, FPR8:$Rn)>;
4482 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
4483 (INSTH FPR16:$Src, FPR16:$Rn)>;
4484 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
4485 (INSTS FPR32:$Src, FPR32:$Rn)>;
4486 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
4487 (INSTD FPR64:$Src, FPR64:$Rn)>;
4490 // Scalar Shift By Immediate
4492 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
4493 RegisterClass FPRC, Operand ImmTy>
4494 : NeonI_ScalarShiftImm<u, opcode,
4495 (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
4496 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4499 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
4501 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4503 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4504 let Inst{21-16} = Imm;
4508 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
4510 : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
4511 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
4513 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4514 let Inst{18-16} = Imm;
4516 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
4518 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4519 let Inst{19-16} = Imm;
4521 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4523 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4524 let Inst{20-16} = Imm;
4528 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
4530 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4532 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4533 let Inst{21-16} = Imm;
4537 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4539 : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4540 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4542 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4543 let Inst{18-16} = Imm;
4545 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4547 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4548 let Inst{19-16} = Imm;
4550 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4552 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4553 let Inst{20-16} = Imm;
4557 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4558 : NeonI_ScalarShiftImm<u, opcode,
4560 (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4561 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4564 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4565 let Inst{21-16} = Imm;
4566 let Constraints = "$Src = $Rd";
4569 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4570 : NeonI_ScalarShiftImm<u, opcode,
4572 (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4573 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4576 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4577 let Inst{21-16} = Imm;
4578 let Constraints = "$Src = $Rd";
4581 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4582 RegisterClass FPRCD, RegisterClass FPRCS,
4584 : NeonI_ScalarShiftImm<u, opcode,
4585 (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4586 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4589 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4591 def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4594 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4595 let Inst{18-16} = Imm;
4597 def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4600 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4601 let Inst{19-16} = Imm;
4603 def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4606 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4607 let Inst{20-16} = Imm;
4611 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4612 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4614 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4615 let Inst{20-16} = Imm;
4617 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4619 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4620 let Inst{21-16} = Imm;
4624 multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
4625 Instruction INSTD> {
4626 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4627 (INSTD FPR64:$Rn, imm:$Imm)>;
4630 multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
4631 Instruction INSTD> {
4632 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
4633 (INSTD FPR64:$Rn, imm:$Imm)>;
4636 class Neon_ScalarShiftImm_V1_D_size_patterns<SDPatternOperator opnode,
4638 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4639 (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
4640 (INSTD FPR64:$Rn, imm:$Imm)>;
4642 multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
4647 : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
4648 def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
4649 (INSTB FPR8:$Rn, imm:$Imm)>;
4650 def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
4651 (INSTH FPR16:$Rn, imm:$Imm)>;
4652 def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
4653 (INSTS FPR32:$Rn, imm:$Imm)>;
4656 class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
4658 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4659 (i32 shl_imm64:$Imm))),
4660 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4662 class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
4664 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4665 (i32 shr_imm64:$Imm))),
4666 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4668 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4669 SDPatternOperator opnode,
4672 Instruction INSTD> {
4673 def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
4674 (INSTH FPR16:$Rn, imm:$Imm)>;
4675 def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4676 (INSTS FPR32:$Rn, imm:$Imm)>;
4677 def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4678 (INSTD FPR64:$Rn, imm:$Imm)>;
4681 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode,
4683 Instruction INSTD> {
4684 def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4685 (INSTS FPR32:$Rn, imm:$Imm)>;
4686 def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4687 (INSTD FPR64:$Rn, imm:$Imm)>;
4690 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode,
4692 Instruction INSTD> {
4693 def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4694 (INSTS FPR32:$Rn, imm:$Imm)>;
4695 def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4696 (INSTD FPR64:$Rn, imm:$Imm)>;
4699 // Scalar Signed Shift Right (Immediate)
4700 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4701 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4702 // Pattern to match llvm.arm.* intrinsic.
4703 def : Neon_ScalarShiftImm_V1_D_size_patterns<sra, SSHRddi>;
4705 // Scalar Unsigned Shift Right (Immediate)
4706 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4707 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4708 // Pattern to match llvm.arm.* intrinsic.
4709 def : Neon_ScalarShiftImm_V1_D_size_patterns<srl, USHRddi>;
4711 // Scalar Signed Rounding Shift Right (Immediate)
4712 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4713 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4715 // Scalar Unigned Rounding Shift Right (Immediate)
4716 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4717 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4719 // Scalar Signed Shift Right and Accumulate (Immediate)
4720 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4721 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4722 <int_aarch64_neon_vsrads_n, SSRA>;
4724 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4725 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4726 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4727 <int_aarch64_neon_vsradu_n, USRA>;
4729 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4730 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4731 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4732 <int_aarch64_neon_vrsrads_n, SRSRA>;
4734 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4735 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4736 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4737 <int_aarch64_neon_vrsradu_n, URSRA>;
4739 // Scalar Shift Left (Immediate)
4740 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4741 defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4742 // Pattern to match llvm.arm.* intrinsic.
4743 def : Neon_ScalarShiftImm_V1_D_size_patterns<shl, SHLddi>;
4745 // Signed Saturating Shift Left (Immediate)
4746 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4747 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4749 SQSHLssi, SQSHLddi>;
4750 // Pattern to match llvm.arm.* intrinsic.
4751 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4753 // Unsigned Saturating Shift Left (Immediate)
4754 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4755 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4757 UQSHLssi, UQSHLddi>;
4758 // Pattern to match llvm.arm.* intrinsic.
4759 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4761 // Signed Saturating Shift Left Unsigned (Immediate)
4762 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4763 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4764 SQSHLUbbi, SQSHLUhhi,
4765 SQSHLUssi, SQSHLUddi>;
4767 // Shift Right And Insert (Immediate)
4768 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4769 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4770 <int_aarch64_neon_vsri, SRI>;
4772 // Shift Left And Insert (Immediate)
4773 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4774 def : Neon_ScalarShiftLImm_accum_D_size_patterns
4775 <int_aarch64_neon_vsli, SLI>;
4777 // Signed Saturating Shift Right Narrow (Immediate)
4778 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4779 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4780 SQSHRNbhi, SQSHRNhsi,
4783 // Unsigned Saturating Shift Right Narrow (Immediate)
4784 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
4785 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
4786 UQSHRNbhi, UQSHRNhsi,
4789 // Signed Saturating Rounded Shift Right Narrow (Immediate)
4790 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
4791 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
4792 SQRSHRNbhi, SQRSHRNhsi,
4795 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
4796 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
4797 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
4798 UQRSHRNbhi, UQRSHRNhsi,
4801 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
4802 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
4803 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
4804 SQSHRUNbhi, SQSHRUNhsi,
4807 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
4808 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
4809 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
4810 SQRSHRUNbhi, SQRSHRUNhsi,
4813 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
4814 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
4815 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n,
4816 SCVTF_Nssi, SCVTF_Nddi>;
4818 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
4819 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
4820 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n,
4821 UCVTF_Nssi, UCVTF_Nddi>;
4823 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
4824 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
4825 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n,
4826 FCVTZS_Nssi, FCVTZS_Nddi>;
4828 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
4829 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
4830 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n,
4831 FCVTZU_Nssi, FCVTZU_Nddi>;
4833 // Patterns For Convert Instructions Between v1f64 and v1i64
4834 class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
4836 : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4837 (INST FPR64:$Rn, imm:$Imm)>;
4839 class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
4841 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4842 (INST FPR64:$Rn, imm:$Imm)>;
4844 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
4847 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
4850 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
4853 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
4856 // Scalar Integer Add
4857 let isCommutable = 1 in {
4858 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
4861 // Scalar Integer Sub
4862 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
4864 // Pattern for Scalar Integer Add and Sub with D register only
4865 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
4866 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
4868 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
4869 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
4870 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
4871 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
4872 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
4874 // Scalar Integer Saturating Add (Signed, Unsigned)
4875 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
4876 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
4878 // Scalar Integer Saturating Sub (Signed, Unsigned)
4879 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
4880 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
4883 // Patterns to match llvm.aarch64.* intrinsic for
4884 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
4885 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
4886 SQADDhhh, SQADDsss, SQADDddd>;
4887 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
4888 UQADDhhh, UQADDsss, UQADDddd>;
4889 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
4890 SQSUBhhh, SQSUBsss, SQSUBddd>;
4891 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
4892 UQSUBhhh, UQSUBsss, UQSUBddd>;
4894 // Scalar Integer Saturating Doubling Multiply Half High
4895 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
4897 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4898 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
4900 // Patterns to match llvm.arm.* intrinsic for
4901 // Scalar Integer Saturating Doubling Multiply Half High and
4902 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4903 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
4905 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
4908 // Scalar Floating-point Multiply Extended
4909 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
4911 // Scalar Floating-point Reciprocal Step
4912 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
4913 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
4914 FRECPSsss, f64, f64, FRECPSddd>;
4915 def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4916 (FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
4918 // Scalar Floating-point Reciprocal Square Root Step
4919 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
4920 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
4921 FRSQRTSsss, f64, f64, FRSQRTSddd>;
4922 def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4923 (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
4924 def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
4926 // Patterns to match llvm.aarch64.* intrinsic for
4927 // Scalar Floating-point Multiply Extended,
4928 multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
4930 Instruction INSTD> {
4931 def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4932 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4933 def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4934 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4937 defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
4938 FMULXsss, FMULXddd>;
4939 def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4940 (FMULXddd FPR64:$Rn, FPR64:$Rm)>;
4942 // Scalar Integer Shift Left (Signed, Unsigned)
4943 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
4944 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
4946 // Patterns to match llvm.arm.* intrinsic for
4947 // Scalar Integer Shift Left (Signed, Unsigned)
4948 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
4949 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
4951 // Patterns to match llvm.aarch64.* intrinsic for
4952 // Scalar Integer Shift Left (Signed, Unsigned)
4953 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
4954 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
4956 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
4957 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
4958 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
4960 // Patterns to match llvm.aarch64.* intrinsic for
4961 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
4962 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
4963 SQSHLhhh, SQSHLsss, SQSHLddd>;
4964 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
4965 UQSHLhhh, UQSHLsss, UQSHLddd>;
4967 // Patterns to match llvm.arm.* intrinsic for
4968 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
4969 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
4970 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
4972 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4973 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
4974 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
4976 // Patterns to match llvm.aarch64.* intrinsic for
4977 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4978 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
4979 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
4981 // Patterns to match llvm.arm.* intrinsic for
4982 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4983 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
4984 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
4986 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4987 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
4988 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
4990 // Patterns to match llvm.aarch64.* intrinsic for
4991 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4992 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
4993 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
4994 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
4995 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
4997 // Patterns to match llvm.arm.* intrinsic for
4998 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4999 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
5000 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
5002 // Signed Saturating Doubling Multiply-Add Long
5003 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
5004 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
5005 SQDMLALshh, SQDMLALdss>;
5007 // Signed Saturating Doubling Multiply-Subtract Long
5008 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
5009 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
5010 SQDMLSLshh, SQDMLSLdss>;
5012 // Signed Saturating Doubling Multiply Long
5013 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
5014 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
5015 SQDMULLshh, SQDMULLdss>;
5017 // Scalar Signed Integer Convert To Floating-point
5018 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
5019 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps,
5022 // Scalar Unsigned Integer Convert To Floating-point
5023 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
5024 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu,
5027 // Scalar Floating-point Converts
5028 def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
5029 def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
5032 defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
5033 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
5034 FCVTNSss, FCVTNSdd>;
5035 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
5037 defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
5038 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
5039 FCVTNUss, FCVTNUdd>;
5040 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
5042 defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
5043 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
5044 FCVTMSss, FCVTMSdd>;
5045 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
5047 defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
5048 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
5049 FCVTMUss, FCVTMUdd>;
5050 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
5052 defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
5053 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
5054 FCVTASss, FCVTASdd>;
5055 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
5057 defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
5058 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
5059 FCVTAUss, FCVTAUdd>;
5060 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
5062 defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
5063 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
5064 FCVTPSss, FCVTPSdd>;
5065 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
5067 defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
5068 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
5069 FCVTPUss, FCVTPUdd>;
5070 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
5072 defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
5073 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
5074 FCVTZSss, FCVTZSdd>;
5075 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
5078 defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
5079 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
5080 FCVTZUss, FCVTZUdd>;
5081 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
5084 // Patterns For Convert Instructions Between v1f64 and v1i64
5085 class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
5087 : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5089 class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
5091 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5093 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
5094 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
5096 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
5097 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
5099 // Scalar Floating-point Reciprocal Estimate
5100 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
5101 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe,
5102 FRECPEss, FRECPEdd>;
5103 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe,
5106 // Scalar Floating-point Reciprocal Exponent
5107 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
5108 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
5109 FRECPXss, FRECPXdd>;
5111 // Scalar Floating-point Reciprocal Square Root Estimate
5112 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
5113 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte,
5114 FRSQRTEss, FRSQRTEdd>;
5115 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte,
5118 // Scalar Floating-point Round
5119 class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
5120 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5122 def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
5123 def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
5124 def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
5125 def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
5126 def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
5127 def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
5128 def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
5130 // Scalar Integer Compare
5132 // Scalar Compare Bitwise Equal
5133 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
5134 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
5136 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
5139 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
5140 (INSTD FPR64:$Rn, FPR64:$Rm)>;
5142 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
5144 // Scalar Compare Signed Greather Than Or Equal
5145 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
5146 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
5147 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
5149 // Scalar Compare Unsigned Higher Or Same
5150 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
5151 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
5152 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
5154 // Scalar Compare Unsigned Higher
5155 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
5156 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
5157 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
5159 // Scalar Compare Signed Greater Than
5160 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
5161 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
5162 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
5164 // Scalar Compare Bitwise Test Bits
5165 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
5166 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
5167 defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
5169 // Scalar Compare Bitwise Equal To Zero
5170 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
5171 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
5173 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
5175 // Scalar Compare Signed Greather Than Or Equal To Zero
5176 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
5177 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
5179 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
5181 // Scalar Compare Signed Greater Than Zero
5182 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
5183 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
5185 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
5187 // Scalar Compare Signed Less Than Or Equal To Zero
5188 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
5189 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
5191 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
5193 // Scalar Compare Less Than Zero
5194 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
5195 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
5197 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
5199 // Scalar Floating-point Compare
5201 // Scalar Floating-point Compare Mask Equal
5202 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
5203 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
5204 FCMEQsss, v1i64, f64, FCMEQddd>;
5205 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
5207 // Scalar Floating-point Compare Mask Equal To Zero
5208 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
5209 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ,
5210 FCMEQZssi, FCMEQZddi>;
5212 // Scalar Floating-point Compare Mask Greater Than Or Equal
5213 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
5214 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
5215 FCMGEsss, v1i64, f64, FCMGEddd>;
5216 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
5218 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
5219 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
5220 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE,
5221 FCMGEZssi, FCMGEZddi>;
5223 // Scalar Floating-point Compare Mask Greather Than
5224 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
5225 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
5226 FCMGTsss, v1i64, f64, FCMGTddd>;
5227 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
5229 // Scalar Floating-point Compare Mask Greather Than Zero
5230 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
5231 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT,
5232 FCMGTZssi, FCMGTZddi>;
5234 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
5235 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
5236 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE,
5237 FCMLEZssi, FCMLEZddi>;
5239 // Scalar Floating-point Compare Mask Less Than Zero
5240 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
5241 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
5242 FCMLTZssi, FCMLTZddi>;
5244 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
5245 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
5246 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
5247 FACGEsss, v1i64, f64, FACGEddd>;
5248 def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5249 (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
5251 // Scalar Floating-point Absolute Compare Mask Greater Than
5252 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
5253 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
5254 FACGTsss, v1i64, f64, FACGTddd>;
5255 def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5256 (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
5258 // Scalar Floating-point Absolute Difference
5259 defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
5260 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
5261 FABDsss, f64, f64, FABDddd>;
5263 // Scalar Absolute Value
5264 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
5265 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
5267 // Scalar Signed Saturating Absolute Value
5268 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
5269 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
5270 SQABSbb, SQABShh, SQABSss, SQABSdd>;
5273 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
5274 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
5276 // Scalar Signed Saturating Negate
5277 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
5278 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
5279 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
5281 // Scalar Signed Saturating Accumulated of Unsigned Value
5282 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
5283 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
5285 SUQADDss, SUQADDdd>;
5287 // Scalar Unsigned Saturating Accumulated of Signed Value
5288 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
5289 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
5291 USQADDss, USQADDdd>;
5293 def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
5294 (v1i64 FPR64:$Rn))),
5295 (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
5297 def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
5298 (v1i64 FPR64:$Rn))),
5299 (USQADDdd FPR64:$Src, FPR64:$Rn)>;
5301 def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
5304 def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
5305 (SQABSdd FPR64:$Rn)>;
5307 def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
5308 (SQNEGdd FPR64:$Rn)>;
5310 def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
5311 (v1i64 FPR64:$Rn))),
5314 // Scalar Signed Saturating Extract Unsigned Narrow
5315 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
5316 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
5320 // Scalar Signed Saturating Extract Narrow
5321 defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
5322 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
5326 // Scalar Unsigned Saturating Extract Narrow
5327 defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
5328 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
5332 // Scalar Reduce Pairwise
5334 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
5335 string asmop, bit Commutable = 0> {
5336 let isCommutable = Commutable in {
5337 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
5338 (outs FPR64:$Rd), (ins VPR128:$Rn),
5339 !strconcat(asmop, "\t$Rd, $Rn.2d"),
5345 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
5346 string asmop, bit Commutable = 0>
5347 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
5348 let isCommutable = Commutable in {
5349 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
5350 (outs FPR32:$Rd), (ins VPR64:$Rn),
5351 !strconcat(asmop, "\t$Rd, $Rn.2s"),
5357 // Scalar Reduce Addition Pairwise (Integer) with
5358 // Pattern to match llvm.arm.* intrinsic
5359 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
5361 // Pattern to match llvm.aarch64.* intrinsic for
5362 // Scalar Reduce Addition Pairwise (Integer)
5363 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
5364 (ADDPvv_D_2D VPR128:$Rn)>;
5365 def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
5366 (ADDPvv_D_2D VPR128:$Rn)>;
5368 // Scalar Reduce Addition Pairwise (Floating Point)
5369 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
5371 // Scalar Reduce Maximum Pairwise (Floating Point)
5372 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
5374 // Scalar Reduce Minimum Pairwise (Floating Point)
5375 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
5377 // Scalar Reduce maxNum Pairwise (Floating Point)
5378 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
5380 // Scalar Reduce minNum Pairwise (Floating Point)
5381 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
5383 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
5385 Instruction INSTD> {
5386 def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))),
5388 def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))),
5389 (INSTD VPR128:$Rn)>;
5392 // Patterns to match llvm.aarch64.* intrinsic for
5393 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
5394 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
5395 FADDPvv_S_2S, FADDPvv_D_2D>;
5397 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
5398 FMAXPvv_S_2S, FMAXPvv_D_2D>;
5400 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
5401 FMINPvv_S_2S, FMINPvv_D_2D>;
5403 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
5404 FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
5406 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
5407 FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
5409 def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))),
5410 (FADDPvv_S_2S (v2f32
5412 (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
5415 // Scalar by element Arithmetic
5417 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
5418 string rmlane, bit u, bit szhi, bit szlo,
5419 RegisterClass ResFPR, RegisterClass OpFPR,
5420 RegisterOperand OpVPR, Operand OpImm>
5421 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5423 (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5424 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5431 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
5433 bit u, bit szhi, bit szlo,
5434 RegisterClass ResFPR,
5435 RegisterClass OpFPR,
5436 RegisterOperand OpVPR,
5438 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5440 (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5441 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5444 let Constraints = "$src = $Rd";
5449 // Scalar Floating Point multiply (scalar, by element)
5450 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
5451 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5452 let Inst{11} = Imm{1}; // h
5453 let Inst{21} = Imm{0}; // l
5454 let Inst{20-16} = MRm;
5456 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
5457 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5458 let Inst{11} = Imm{0}; // h
5459 let Inst{21} = 0b0; // l
5460 let Inst{20-16} = MRm;
5463 // Scalar Floating Point multiply extended (scalar, by element)
5464 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
5465 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5466 let Inst{11} = Imm{1}; // h
5467 let Inst{21} = Imm{0}; // l
5468 let Inst{20-16} = MRm;
5470 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
5471 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5472 let Inst{11} = Imm{0}; // h
5473 let Inst{21} = 0b0; // l
5474 let Inst{20-16} = MRm;
5477 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
5478 SDPatternOperator opnode,
5480 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5481 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5483 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5484 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
5485 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5487 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5488 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
5489 (ResTy (INST (ResTy FPRC:$Rn),
5490 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5494 def : Pat<(ResTy (opnode
5495 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5497 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5499 def : Pat<(ResTy (opnode
5500 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5502 (ResTy (INST (ResTy FPRC:$Rn),
5503 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5507 // Patterns for Scalar Floating Point multiply (scalar, by element)
5508 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
5509 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5510 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
5511 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5513 // Patterns for Scalar Floating Point multiply extended (scalar, by element)
5514 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5515 FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
5516 v2f32, v4f32, neon_uimm1_bare>;
5517 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5518 FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
5519 v1f64, v2f64, neon_uimm0_bare>;
5521 // Scalar Floating Point fused multiply-add (scalar, by element)
5522 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5523 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5524 let Inst{11} = Imm{1}; // h
5525 let Inst{21} = Imm{0}; // l
5526 let Inst{20-16} = MRm;
5528 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5529 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5530 let Inst{11} = Imm{0}; // h
5531 let Inst{21} = 0b0; // l
5532 let Inst{20-16} = MRm;
5535 // Scalar Floating Point fused multiply-subtract (scalar, by element)
5536 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5537 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5538 let Inst{11} = Imm{1}; // h
5539 let Inst{21} = Imm{0}; // l
5540 let Inst{20-16} = MRm;
5542 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5543 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5544 let Inst{11} = Imm{0}; // h
5545 let Inst{21} = 0b0; // l
5546 let Inst{20-16} = MRm;
5548 // We are allowed to match the fma instruction regardless of compile options.
5549 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
5550 Instruction FMLAI, Instruction FMLSI,
5551 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5552 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5554 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5555 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5557 (ResTy (FMLAI (ResTy FPRC:$Ra),
5558 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5560 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5561 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5563 (ResTy (FMLAI (ResTy FPRC:$Ra),
5565 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5568 // swapped fmla operands
5569 def : Pat<(ResTy (fma
5570 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5573 (ResTy (FMLAI (ResTy FPRC:$Ra),
5574 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5576 def : Pat<(ResTy (fma
5577 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5580 (ResTy (FMLAI (ResTy FPRC:$Ra),
5582 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5586 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5587 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5589 (ResTy (FMLSI (ResTy FPRC:$Ra),
5590 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5592 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5593 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5595 (ResTy (FMLSI (ResTy FPRC:$Ra),
5597 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5600 // swapped fmls operands
5601 def : Pat<(ResTy (fma
5602 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5605 (ResTy (FMLSI (ResTy FPRC:$Ra),
5606 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5608 def : Pat<(ResTy (fma
5609 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5612 (ResTy (FMLSI (ResTy FPRC:$Ra),
5614 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5618 // Scalar Floating Point fused multiply-add and
5619 // multiply-subtract (scalar, by element)
5620 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
5621 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5622 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5623 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5624 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5625 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5627 // Scalar Signed saturating doubling multiply long (scalar, by element)
5628 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5629 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5630 let Inst{11} = 0b0; // h
5631 let Inst{21} = Imm{1}; // l
5632 let Inst{20} = Imm{0}; // m
5633 let Inst{19-16} = MRm{3-0};
5635 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5636 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5637 let Inst{11} = Imm{2}; // h
5638 let Inst{21} = Imm{1}; // l
5639 let Inst{20} = Imm{0}; // m
5640 let Inst{19-16} = MRm{3-0};
5642 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5643 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5644 let Inst{11} = 0b0; // h
5645 let Inst{21} = Imm{0}; // l
5646 let Inst{20-16} = MRm;
5648 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5649 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5650 let Inst{11} = Imm{1}; // h
5651 let Inst{21} = Imm{0}; // l
5652 let Inst{20-16} = MRm;
5655 multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
5656 SDPatternOperator opnode,
5658 ValueType ResTy, RegisterClass FPRC,
5659 ValueType OpVTy, ValueType OpTy,
5660 ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5662 def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5663 (OpVTy (scalar_to_vector
5664 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
5665 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5668 def : Pat<(ResTy (opnode
5669 (OpVTy (scalar_to_vector
5670 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5672 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5676 // Patterns for Scalar Signed saturating doubling
5677 // multiply long (scalar, by element)
5678 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5679 SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
5680 i32, VPR64Lo, neon_uimm2_bare>;
5681 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5682 SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
5683 i32, VPR128Lo, neon_uimm3_bare>;
5684 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5685 SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
5686 i32, VPR64Lo, neon_uimm1_bare>;
5687 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5688 SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
5689 i32, VPR128Lo, neon_uimm2_bare>;
5691 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
5692 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5693 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5694 let Inst{11} = 0b0; // h
5695 let Inst{21} = Imm{1}; // l
5696 let Inst{20} = Imm{0}; // m
5697 let Inst{19-16} = MRm{3-0};
5699 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5700 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5701 let Inst{11} = Imm{2}; // h
5702 let Inst{21} = Imm{1}; // l
5703 let Inst{20} = Imm{0}; // m
5704 let Inst{19-16} = MRm{3-0};
5706 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5707 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5708 let Inst{11} = 0b0; // h
5709 let Inst{21} = Imm{0}; // l
5710 let Inst{20-16} = MRm;
5712 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5713 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5714 let Inst{11} = Imm{1}; // h
5715 let Inst{21} = Imm{0}; // l
5716 let Inst{20-16} = MRm;
5719 // Scalar Signed saturating doubling
5720 // multiply-subtract long (scalar, by element)
5721 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5722 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5723 let Inst{11} = 0b0; // h
5724 let Inst{21} = Imm{1}; // l
5725 let Inst{20} = Imm{0}; // m
5726 let Inst{19-16} = MRm{3-0};
5728 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5729 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5730 let Inst{11} = Imm{2}; // h
5731 let Inst{21} = Imm{1}; // l
5732 let Inst{20} = Imm{0}; // m
5733 let Inst{19-16} = MRm{3-0};
5735 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5736 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5737 let Inst{11} = 0b0; // h
5738 let Inst{21} = Imm{0}; // l
5739 let Inst{20-16} = MRm;
5741 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5742 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5743 let Inst{11} = Imm{1}; // h
5744 let Inst{21} = Imm{0}; // l
5745 let Inst{20-16} = MRm;
5748 multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
5749 SDPatternOperator opnode,
5750 SDPatternOperator coreopnode,
5752 ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
5754 ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5756 def : Pat<(ResTy (opnode
5757 (ResTy ResFPRC:$Ra),
5758 (ResTy (coreopnode (OpTy FPRC:$Rn),
5759 (OpTy (scalar_to_vector
5760 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
5761 (ResTy (INST (ResTy ResFPRC:$Ra),
5762 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5765 def : Pat<(ResTy (opnode
5766 (ResTy ResFPRC:$Ra),
5768 (OpTy (scalar_to_vector
5769 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
5770 (OpTy FPRC:$Rn))))),
5771 (ResTy (INST (ResTy ResFPRC:$Ra),
5772 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5775 // Patterns for Scalar Signed saturating
5776 // doubling multiply-add long (scalar, by element)
5777 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5778 int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5779 i32, VPR64Lo, neon_uimm2_bare>;
5780 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5781 int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5782 i32, VPR128Lo, neon_uimm3_bare>;
5783 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5784 int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5785 i32, VPR64Lo, neon_uimm1_bare>;
5786 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5787 int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5788 i32, VPR128Lo, neon_uimm2_bare>;
5790 // Patterns for Scalar Signed saturating
5791 // doubling multiply-sub long (scalar, by element)
5792 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5793 int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5794 i32, VPR64Lo, neon_uimm2_bare>;
5795 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5796 int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5797 i32, VPR128Lo, neon_uimm3_bare>;
5798 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5799 int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5800 i32, VPR64Lo, neon_uimm1_bare>;
5801 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5802 int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5803 i32, VPR128Lo, neon_uimm2_bare>;
5805 // Scalar Signed saturating doubling multiply returning
5806 // high half (scalar, by element)
5807 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5808 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5809 let Inst{11} = 0b0; // h
5810 let Inst{21} = Imm{1}; // l
5811 let Inst{20} = Imm{0}; // m
5812 let Inst{19-16} = MRm{3-0};
5814 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5815 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5816 let Inst{11} = Imm{2}; // h
5817 let Inst{21} = Imm{1}; // l
5818 let Inst{20} = Imm{0}; // m
5819 let Inst{19-16} = MRm{3-0};
5821 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5822 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5823 let Inst{11} = 0b0; // h
5824 let Inst{21} = Imm{0}; // l
5825 let Inst{20-16} = MRm;
5827 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5828 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5829 let Inst{11} = Imm{1}; // h
5830 let Inst{21} = Imm{0}; // l
5831 let Inst{20-16} = MRm;
5834 // Patterns for Scalar Signed saturating doubling multiply returning
5835 // high half (scalar, by element)
5836 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5837 SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
5838 i32, VPR64Lo, neon_uimm2_bare>;
5839 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5840 SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
5841 i32, VPR128Lo, neon_uimm3_bare>;
5842 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5843 SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
5844 i32, VPR64Lo, neon_uimm1_bare>;
5845 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5846 SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
5847 i32, VPR128Lo, neon_uimm2_bare>;
5849 // Scalar Signed saturating rounding doubling multiply
5850 // returning high half (scalar, by element)
5851 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5852 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5853 let Inst{11} = 0b0; // h
5854 let Inst{21} = Imm{1}; // l
5855 let Inst{20} = Imm{0}; // m
5856 let Inst{19-16} = MRm{3-0};
5858 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5859 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5860 let Inst{11} = Imm{2}; // h
5861 let Inst{21} = Imm{1}; // l
5862 let Inst{20} = Imm{0}; // m
5863 let Inst{19-16} = MRm{3-0};
5865 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5866 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5867 let Inst{11} = 0b0; // h
5868 let Inst{21} = Imm{0}; // l
5869 let Inst{20-16} = MRm;
5871 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5872 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5873 let Inst{11} = Imm{1}; // h
5874 let Inst{21} = Imm{0}; // l
5875 let Inst{20-16} = MRm;
5878 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5879 SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
5880 VPR64Lo, neon_uimm2_bare>;
5881 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5882 SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
5883 VPR128Lo, neon_uimm3_bare>;
5884 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5885 SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
5886 VPR64Lo, neon_uimm1_bare>;
5887 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5888 SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
5889 VPR128Lo, neon_uimm2_bare>;
5891 // Scalar general arithmetic operation
5892 class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
5894 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5896 class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
5898 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5899 (INST FPR64:$Rn, FPR64:$Rm)>;
5901 class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
5903 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
5904 (v1f64 FPR64:$Ra))),
5905 (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
5907 def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
5908 def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
5909 def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
5910 def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
5911 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
5912 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
5913 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
5914 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
5915 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
5917 def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
5918 def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
5920 def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
5921 def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
5923 // Scalar Copy - DUP element to scalar
5924 class NeonI_Scalar_DUP<string asmop, string asmlane,
5925 RegisterClass ResRC, RegisterOperand VPRC,
5927 : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
5928 asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
5934 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
5935 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5937 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
5938 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5940 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
5941 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5943 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
5944 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5947 multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy,
5948 ValueType OpTy, Operand OpImm,
5949 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5950 def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
5951 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
5953 def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
5955 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5959 // Patterns for vector extract of FP data using scalar DUP instructions
5960 defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32,
5961 v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5962 defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64,
5963 v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5965 multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
5966 ValueType ResTy, ValueType OpTy,Operand OpLImm,
5967 ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
5969 def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
5970 (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
5972 def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
5974 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5978 // Patterns for extract subvectors of v1ix data using scalar DUP instructions.
5979 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
5980 v8i8, v16i8, neon_uimm3_bare>;
5981 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
5982 v4i16, v8i16, neon_uimm2_bare>;
5983 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
5984 v2i32, v4i32, neon_uimm1_bare>;
5986 multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
5987 ValueType OpTy, ValueType ElemTy,
5988 Operand OpImm, ValueType OpNTy,
5989 ValueType ExTy, Operand OpNImm> {
5991 def : Pat<(ResTy (vector_insert (ResTy undef),
5992 (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
5993 (neon_uimm0_bare:$Imm))),
5994 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
5996 def : Pat<(ResTy (vector_insert (ResTy undef),
5997 (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
6000 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6004 multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
6005 ValueType OpTy, ValueType ElemTy,
6006 Operand OpImm, ValueType OpNTy,
6007 ValueType ExTy, Operand OpNImm> {
6009 def : Pat<(ResTy (scalar_to_vector
6010 (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
6011 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6013 def : Pat<(ResTy (scalar_to_vector
6014 (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
6016 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6020 // Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
6022 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
6023 v1i64, v2i64, i64, neon_uimm1_bare,
6024 v1i64, v2i64, neon_uimm0_bare>;
6025 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
6026 v1i32, v4i32, i32, neon_uimm2_bare,
6027 v2i32, v4i32, neon_uimm1_bare>;
6028 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
6029 v1i16, v8i16, i32, neon_uimm3_bare,
6030 v4i16, v8i16, neon_uimm2_bare>;
6031 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
6032 v1i8, v16i8, i32, neon_uimm4_bare,
6033 v8i8, v16i8, neon_uimm3_bare>;
6034 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
6035 v1i64, v2i64, i64, neon_uimm1_bare,
6036 v1i64, v2i64, neon_uimm0_bare>;
6037 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
6038 v1i32, v4i32, i32, neon_uimm2_bare,
6039 v2i32, v4i32, neon_uimm1_bare>;
6040 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
6041 v1i16, v8i16, i32, neon_uimm3_bare,
6042 v4i16, v8i16, neon_uimm2_bare>;
6043 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
6044 v1i8, v16i8, i32, neon_uimm4_bare,
6045 v8i8, v16i8, neon_uimm3_bare>;
6047 multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
6048 Instruction DUPI, Operand OpImm,
6049 RegisterClass ResRC> {
6050 def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
6051 (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
6054 // Aliases for Scalar copy - DUP element (scalar)
6055 // FIXME: This is actually the preferred syntax but TableGen can't deal with
6056 // custom printing of aliases.
6057 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
6058 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
6059 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
6060 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
6062 multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
6064 def : Pat<(ResTy (GetLow VPR128:$Rn)),
6065 (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
6066 def : Pat<(ResTy (GetHigh VPR128:$Rn)),
6067 (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
6070 defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
6071 defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
6072 defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
6073 defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
6074 defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
6075 defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
6077 //===----------------------------------------------------------------------===//
6078 // Non-Instruction Patterns
6079 //===----------------------------------------------------------------------===//
6081 // 64-bit vector bitcasts...
6083 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
6084 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
6085 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
6086 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
6088 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
6089 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
6090 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
6091 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
6093 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
6094 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
6095 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
6096 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
6098 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
6099 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
6100 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
6101 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
6103 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
6104 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
6105 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
6106 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
6108 // ..and 128-bit vector bitcasts...
6110 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
6111 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
6112 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
6113 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
6114 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
6116 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
6117 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
6118 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
6119 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
6120 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
6122 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
6123 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
6124 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
6125 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
6126 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
6128 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
6129 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
6130 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
6131 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
6132 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
6134 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
6135 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
6136 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
6137 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
6138 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
6140 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
6141 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
6142 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
6143 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
6144 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
6146 // ...and scalar bitcasts...
6147 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
6148 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
6149 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
6150 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
6152 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
6153 def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>;
6154 def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>;
6155 def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>;
6156 def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>;
6157 def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>;
6159 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
6161 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
6162 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
6163 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
6165 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
6166 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
6167 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
6168 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
6169 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
6171 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
6172 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
6173 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
6174 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
6175 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
6176 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
6178 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
6179 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
6180 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
6181 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
6183 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6184 def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6185 def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6186 def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6187 def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6188 def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6190 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
6192 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
6193 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
6194 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
6195 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
6196 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
6198 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
6199 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
6200 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
6201 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
6202 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
6203 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
6205 // Scalar Three Same
6207 def neon_uimm3 : Operand<i64>,
6208 ImmLeaf<i64, [{return Imm < 8;}]> {
6209 let ParserMatchClass = uimm3_asmoperand;
6210 let PrintMethod = "printUImmHexOperand";
6213 def neon_uimm4 : Operand<i64>,
6214 ImmLeaf<i64, [{return Imm < 16;}]> {
6215 let ParserMatchClass = uimm4_asmoperand;
6216 let PrintMethod = "printUImmHexOperand";
6220 class NeonI_Extract<bit q, bits<2> op2, string asmop,
6221 string OpS, RegisterOperand OpVPR, Operand OpImm>
6222 : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
6223 (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
6224 asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
6225 ", $Rm." # OpS # ", $Index",
6231 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
6232 VPR64, neon_uimm3> {
6233 let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
6236 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
6237 VPR128, neon_uimm4> {
6238 let Inst{14-11} = Index;
6241 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
6243 : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
6245 (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
6247 def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
6248 def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
6249 def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
6250 def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
6251 def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
6252 def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
6253 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
6254 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
6255 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
6256 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
6257 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
6258 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
6261 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
6262 string asmop, string OpS, RegisterOperand OpVPR,
6263 RegisterOperand VecList>
6264 : NeonI_TBL<q, op2, len, op,
6265 (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
6266 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6270 // The vectors in look up table are always 16b
6271 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
6272 def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
6273 !cast<RegisterOperand>(List # "16B_operand")>;
6275 def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
6276 !cast<RegisterOperand>(List # "16B_operand")>;
6279 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
6280 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
6281 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
6282 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
6284 // Table lookup extention
6285 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
6286 string asmop, string OpS, RegisterOperand OpVPR,
6287 RegisterOperand VecList>
6288 : NeonI_TBL<q, op2, len, op,
6289 (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
6290 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6293 let Constraints = "$src = $Rd";
6296 // The vectors in look up table are always 16b
6297 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
6298 def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
6299 !cast<RegisterOperand>(List # "16B_operand")>;
6301 def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
6302 !cast<RegisterOperand>(List # "16B_operand")>;
6305 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
6306 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
6307 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
6308 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
6310 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
6311 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
6312 : NeonI_copy<0b1, 0b0, 0b0011,
6313 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
6314 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
6315 [(set (ResTy VPR128:$Rd),
6316 (ResTy (vector_insert
6317 (ResTy VPR128:$src),
6322 let Constraints = "$src = $Rd";
6325 //Insert element (vector, from main)
6326 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
6328 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6330 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
6332 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6334 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6336 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6338 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6340 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6343 def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
6344 (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
6345 def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
6346 (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
6347 def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
6348 (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
6349 def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
6350 (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
6352 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6353 RegisterClass OpGPR, ValueType OpTy,
6354 Operand OpImm, Instruction INS>
6355 : Pat<(ResTy (vector_insert
6359 (ResTy (EXTRACT_SUBREG
6360 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6361 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6363 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6364 neon_uimm3_bare, INSbw>;
6365 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6366 neon_uimm2_bare, INShw>;
6367 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6368 neon_uimm1_bare, INSsw>;
6369 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6370 neon_uimm0_bare, INSdx>;
6372 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6373 : NeonI_insert<0b1, 0b1,
6374 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
6375 ResImm:$Immd, ResImm:$Immn),
6376 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6379 let Constraints = "$src = $Rd";
6384 //Insert element (vector, from element)
6385 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6386 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6387 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6389 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6390 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6391 let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
6392 // bit 11 is unspecified, but should be set to zero.
6394 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6395 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6396 let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
6397 // bits 11-12 are unspecified, but should be set to zero.
6399 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6400 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6401 let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
6402 // bits 11-13 are unspecified, but should be set to zero.
6405 def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
6406 (INSELb VPR128:$Rd, VPR128:$Rn,
6407 neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
6408 def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
6409 (INSELh VPR128:$Rd, VPR128:$Rn,
6410 neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
6411 def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
6412 (INSELs VPR128:$Rd, VPR128:$Rn,
6413 neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
6414 def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
6415 (INSELd VPR128:$Rd, VPR128:$Rn,
6416 neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
6418 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6419 ValueType MidTy, Operand StImm, Operand NaImm,
6421 def : Pat<(ResTy (vector_insert
6422 (ResTy VPR128:$src),
6423 (MidTy (vector_extract
6427 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6428 StImm:$Immd, StImm:$Immn)>;
6430 def : Pat <(ResTy (vector_insert
6431 (ResTy VPR128:$src),
6432 (MidTy (vector_extract
6436 (INS (ResTy VPR128:$src),
6437 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6438 StImm:$Immd, NaImm:$Immn)>;
6440 def : Pat <(NaTy (vector_insert
6442 (MidTy (vector_extract
6446 (NaTy (EXTRACT_SUBREG
6448 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6450 NaImm:$Immd, StImm:$Immn)),
6453 def : Pat <(NaTy (vector_insert
6455 (MidTy (vector_extract
6459 (NaTy (EXTRACT_SUBREG
6461 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6462 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6463 NaImm:$Immd, NaImm:$Immn)),
6467 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6468 neon_uimm1_bare, INSELs>;
6469 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6470 neon_uimm0_bare, INSELd>;
6471 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6472 neon_uimm3_bare, INSELb>;
6473 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6474 neon_uimm2_bare, INSELh>;
6475 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6476 neon_uimm1_bare, INSELs>;
6477 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6478 neon_uimm0_bare, INSELd>;
6480 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6482 RegisterClass OpFPR, Operand ResImm,
6483 SubRegIndex SubIndex, Instruction INS> {
6484 def : Pat <(ResTy (vector_insert
6485 (ResTy VPR128:$src),
6488 (INS (ResTy VPR128:$src),
6489 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6493 def : Pat <(NaTy (vector_insert
6497 (NaTy (EXTRACT_SUBREG
6499 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6500 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6506 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6508 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6511 class NeonI_SMOV<string asmop, string Res, bit Q,
6512 ValueType OpTy, ValueType eleTy,
6513 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6514 : NeonI_copy<Q, 0b0, 0b0101,
6515 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6516 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6517 [(set (ResTy ResGPR:$Rd),
6519 (ResTy (vector_extract
6520 (OpTy VPR128:$Rn), (OpImm:$Imm))),
6526 //Signed integer move (main, from element)
6527 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6529 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6531 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6533 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6535 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6537 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6539 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6541 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6543 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6545 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6548 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6549 ValueType eleTy, Operand StImm, Operand NaImm,
6550 Instruction SMOVI> {
6551 def : Pat<(i64 (sext_inreg
6553 (i32 (vector_extract
6554 (StTy VPR128:$Rn), (StImm:$Imm))))),
6556 (SMOVI VPR128:$Rn, StImm:$Imm)>;
6558 def : Pat<(i64 (sext
6559 (i32 (vector_extract
6560 (StTy VPR128:$Rn), (StImm:$Imm))))),
6561 (SMOVI VPR128:$Rn, StImm:$Imm)>;
6563 def : Pat<(i64 (sext_inreg
6564 (i64 (vector_extract
6565 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6567 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6570 def : Pat<(i64 (sext_inreg
6572 (i32 (vector_extract
6573 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6575 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6578 def : Pat<(i64 (sext
6579 (i32 (vector_extract
6580 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6581 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6585 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6586 neon_uimm3_bare, SMOVxb>;
6587 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6588 neon_uimm2_bare, SMOVxh>;
6589 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6590 neon_uimm1_bare, SMOVxs>;
6592 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6593 ValueType eleTy, Operand StImm, Operand NaImm,
6595 : Pat<(i32 (sext_inreg
6596 (i32 (vector_extract
6597 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6599 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6602 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6603 neon_uimm3_bare, SMOVwb>;
6604 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6605 neon_uimm2_bare, SMOVwh>;
6607 class NeonI_UMOV<string asmop, string Res, bit Q,
6608 ValueType OpTy, Operand OpImm,
6609 RegisterClass ResGPR, ValueType ResTy>
6610 : NeonI_copy<Q, 0b0, 0b0111,
6611 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6612 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6613 [(set (ResTy ResGPR:$Rd),
6614 (ResTy (vector_extract
6615 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6620 //Unsigned integer move (main, from element)
6621 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6623 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6625 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6627 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6629 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6631 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6633 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6635 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6638 def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
6639 (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
6640 def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
6641 (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
6643 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
6644 Operand StImm, Operand NaImm,
6646 : Pat<(ResTy (vector_extract
6647 (NaTy VPR64:$Rn), NaImm:$Imm)),
6648 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6651 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6652 neon_uimm3_bare, UMOVwb>;
6653 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6654 neon_uimm2_bare, UMOVwh>;
6655 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6656 neon_uimm1_bare, UMOVws>;
6659 (i32 (vector_extract
6660 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
6662 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
6665 (i32 (vector_extract
6666 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
6668 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
6670 def : Pat<(i64 (zext
6671 (i32 (vector_extract
6672 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
6673 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
6676 (i32 (vector_extract
6677 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
6679 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6680 neon_uimm3_bare:$Imm)>;
6683 (i32 (vector_extract
6684 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
6686 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6687 neon_uimm2_bare:$Imm)>;
6689 def : Pat<(i64 (zext
6690 (i32 (vector_extract
6691 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
6692 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6693 neon_uimm0_bare:$Imm)>;
6695 // Additional copy patterns for scalar types
6696 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
6698 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
6700 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
6702 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
6704 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
6705 (FMOVws FPR32:$Rn)>;
6707 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
6708 (FMOVxd FPR64:$Rn)>;
6710 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
6713 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
6714 (v1i8 (EXTRACT_SUBREG (v16i8
6715 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6718 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
6719 (v1i16 (EXTRACT_SUBREG (v8i16
6720 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6723 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
6726 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
6729 def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6730 (v8i8 (EXTRACT_SUBREG (v16i8
6731 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6734 def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6735 (v4i16 (EXTRACT_SUBREG (v8i16
6736 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6739 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6740 (v2i32 (EXTRACT_SUBREG (v16i8
6741 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6744 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6745 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6747 def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6748 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6750 def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)),
6751 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6753 def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
6754 (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6756 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6757 (v2i32 (EXTRACT_SUBREG (v16i8
6758 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6761 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6762 (v2i32 (EXTRACT_SUBREG (v16i8
6763 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6766 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
6769 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
6770 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
6771 (f64 FPR64:$src), sub_64)>;
6773 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
6774 RegisterOperand ResVPR, Operand OpImm>
6775 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
6776 (ins VPR128:$Rn, OpImm:$Imm),
6777 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
6783 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
6785 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6788 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
6790 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6793 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
6795 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6798 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
6800 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6803 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
6805 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6808 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
6810 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6813 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
6815 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6818 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
6819 ValueType OpTy,ValueType NaTy,
6820 ValueType ExTy, Operand OpLImm,
6822 def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
6823 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
6825 def : Pat<(ResTy (Neon_vduplane
6826 (NaTy VPR64:$Rn), OpNImm:$Imm)),
6828 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
6830 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
6831 neon_uimm4_bare, neon_uimm3_bare>;
6832 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
6833 neon_uimm4_bare, neon_uimm3_bare>;
6834 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
6835 neon_uimm3_bare, neon_uimm2_bare>;
6836 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
6837 neon_uimm3_bare, neon_uimm2_bare>;
6838 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
6839 neon_uimm2_bare, neon_uimm1_bare>;
6840 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
6841 neon_uimm2_bare, neon_uimm1_bare>;
6842 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
6843 neon_uimm1_bare, neon_uimm0_bare>;
6844 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
6845 neon_uimm2_bare, neon_uimm1_bare>;
6846 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
6847 neon_uimm2_bare, neon_uimm1_bare>;
6848 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
6849 neon_uimm1_bare, neon_uimm0_bare>;
6851 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
6853 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6855 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
6857 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6859 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
6861 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
6864 class NeonI_DUP<bit Q, string asmop, string rdlane,
6865 RegisterOperand ResVPR, ValueType ResTy,
6866 RegisterClass OpGPR, ValueType OpTy>
6867 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
6868 asmop # "\t$Rd" # rdlane # ", $Rn",
6869 [(set (ResTy ResVPR:$Rd),
6870 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
6873 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
6874 let Inst{20-16} = 0b00001;
6875 // bits 17-20 are unspecified, but should be set to zero.
6878 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
6879 let Inst{20-16} = 0b00010;
6880 // bits 18-20 are unspecified, but should be set to zero.
6883 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
6884 let Inst{20-16} = 0b00100;
6885 // bits 19-20 are unspecified, but should be set to zero.
6888 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
6889 let Inst{20-16} = 0b01000;
6890 // bit 20 is unspecified, but should be set to zero.
6893 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
6894 let Inst{20-16} = 0b00001;
6895 // bits 17-20 are unspecified, but should be set to zero.
6898 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
6899 let Inst{20-16} = 0b00010;
6900 // bits 18-20 are unspecified, but should be set to zero.
6903 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
6904 let Inst{20-16} = 0b00100;
6905 // bits 19-20 are unspecified, but should be set to zero.
6908 // patterns for CONCAT_VECTORS
6909 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
6910 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
6911 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
6912 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
6914 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6915 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
6918 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
6920 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6924 defm : Concat_Vector_Pattern<v16i8, v8i8>;
6925 defm : Concat_Vector_Pattern<v8i16, v4i16>;
6926 defm : Concat_Vector_Pattern<v4i32, v2i32>;
6927 defm : Concat_Vector_Pattern<v2i64, v1i64>;
6928 defm : Concat_Vector_Pattern<v4f32, v2f32>;
6929 defm : Concat_Vector_Pattern<v2f64, v1f64>;
6931 //patterns for EXTRACT_SUBVECTOR
6932 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
6933 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6934 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
6935 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6936 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
6937 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6938 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
6939 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6940 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
6941 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6942 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
6943 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6945 // The followings are for instruction class (3V Elem)
6949 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
6950 string asmop, string ResS, string OpS, string EleOpS,
6951 Operand OpImm, RegisterOperand ResVPR,
6952 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
6953 : NeonI_2VElem<q, u, size, opcode,
6954 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
6955 EleOpVPR:$Re, OpImm:$Index),
6956 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
6957 ", $Re." # EleOpS # "[$Index]",
6963 let Constraints = "$src = $Rd";
6966 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
6967 // vector register class for element is always 128-bit to cover the max index
6968 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
6969 neon_uimm2_bare, VPR64, VPR64, VPR128> {
6970 let Inst{11} = {Index{1}};
6971 let Inst{21} = {Index{0}};
6972 let Inst{20-16} = Re;
6975 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
6976 neon_uimm2_bare, VPR128, VPR128, VPR128> {
6977 let Inst{11} = {Index{1}};
6978 let Inst{21} = {Index{0}};
6979 let Inst{20-16} = Re;
6982 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
6983 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
6984 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
6985 let Inst{11} = {Index{2}};
6986 let Inst{21} = {Index{1}};
6987 let Inst{20} = {Index{0}};
6988 let Inst{19-16} = Re{3-0};
6991 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
6992 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
6993 let Inst{11} = {Index{2}};
6994 let Inst{21} = {Index{1}};
6995 let Inst{20} = {Index{0}};
6996 let Inst{19-16} = Re{3-0};
7000 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
7001 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
7003 // Pattern for lane in 128-bit vector
7004 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7005 RegisterOperand ResVPR, RegisterOperand OpVPR,
7006 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7008 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7009 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7010 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7012 // Pattern for lane in 64-bit vector
7013 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7014 RegisterOperand ResVPR, RegisterOperand OpVPR,
7015 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7017 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7018 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7019 (INST ResVPR:$src, OpVPR:$Rn,
7020 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7022 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
7024 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7025 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
7027 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7028 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
7030 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7031 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7033 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7034 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7036 // Index can only be half of the max value for lane in 64-bit vector
7038 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7039 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
7041 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7042 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7045 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
7046 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
7048 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
7049 string asmop, string ResS, string OpS, string EleOpS,
7050 Operand OpImm, RegisterOperand ResVPR,
7051 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7052 : NeonI_2VElem<q, u, size, opcode,
7053 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
7054 EleOpVPR:$Re, OpImm:$Index),
7055 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7056 ", $Re." # EleOpS # "[$Index]",
7063 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
7064 // vector register class for element is always 128-bit to cover the max index
7065 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7066 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7067 let Inst{11} = {Index{1}};
7068 let Inst{21} = {Index{0}};
7069 let Inst{20-16} = Re;
7072 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7073 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7074 let Inst{11} = {Index{1}};
7075 let Inst{21} = {Index{0}};
7076 let Inst{20-16} = Re;
7079 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7080 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7081 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7082 let Inst{11} = {Index{2}};
7083 let Inst{21} = {Index{1}};
7084 let Inst{20} = {Index{0}};
7085 let Inst{19-16} = Re{3-0};
7088 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7089 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7090 let Inst{11} = {Index{2}};
7091 let Inst{21} = {Index{1}};
7092 let Inst{20} = {Index{0}};
7093 let Inst{19-16} = Re{3-0};
7097 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
7098 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
7099 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
7101 // Pattern for lane in 128-bit vector
7102 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7103 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7104 ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7105 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7106 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7107 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7109 // Pattern for lane in 64-bit vector
7110 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7111 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7112 ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7113 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7114 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7116 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7118 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
7119 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7120 op, VPR64, VPR128, v2i32, v2i32, v4i32>;
7122 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7123 op, VPR128, VPR128, v4i32, v4i32, v4i32>;
7125 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7126 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7128 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7129 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7131 // Index can only be half of the max value for lane in 64-bit vector
7133 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7134 op, VPR64, VPR64, v2i32, v2i32, v2i32>;
7136 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7137 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7140 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
7141 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
7142 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
7146 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
7147 // vector register class for element is always 128-bit to cover the max index
7148 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7149 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7150 let Inst{11} = {Index{1}};
7151 let Inst{21} = {Index{0}};
7152 let Inst{20-16} = Re;
7155 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7156 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7157 let Inst{11} = {Index{1}};
7158 let Inst{21} = {Index{0}};
7159 let Inst{20-16} = Re;
7162 // _1d2d doesn't exist!
7164 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7165 neon_uimm1_bare, VPR128, VPR128, VPR128> {
7166 let Inst{11} = {Index{0}};
7168 let Inst{20-16} = Re;
7172 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
7173 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
7175 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
7176 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7177 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
7178 SDPatternOperator coreop>
7179 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7180 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
7182 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
7184 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
7185 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7186 op, VPR64, VPR128, v2f32, v2f32, v4f32>;
7188 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7189 op, VPR128, VPR128, v4f32, v4f32, v4f32>;
7191 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7192 op, VPR128, VPR128, v2f64, v2f64, v2f64>;
7194 // Index can only be half of the max value for lane in 64-bit vector
7196 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7197 op, VPR64, VPR64, v2f32, v2f32, v2f32>;
7199 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7200 op, VPR128, VPR64, v2f64, v2f64, v1f64,
7201 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7204 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
7205 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
7207 def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
7208 (v2f32 VPR64:$Rn))),
7209 (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7211 def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
7212 (v4f32 VPR128:$Rn))),
7213 (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7215 def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
7216 (v2f64 VPR128:$Rn))),
7217 (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
7219 // The followings are patterns using fma
7220 // -ffp-contract=fast generates fma
7222 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
7223 // vector register class for element is always 128-bit to cover the max index
7224 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7225 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7226 let Inst{11} = {Index{1}};
7227 let Inst{21} = {Index{0}};
7228 let Inst{20-16} = Re;
7231 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7232 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7233 let Inst{11} = {Index{1}};
7234 let Inst{21} = {Index{0}};
7235 let Inst{20-16} = Re;
7238 // _1d2d doesn't exist!
7240 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7241 neon_uimm1_bare, VPR128, VPR128, VPR128> {
7242 let Inst{11} = {Index{0}};
7244 let Inst{20-16} = Re;
7248 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
7249 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
7251 // Pattern for lane in 128-bit vector
7252 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7253 RegisterOperand ResVPR, RegisterOperand OpVPR,
7254 ValueType ResTy, ValueType OpTy,
7255 SDPatternOperator coreop>
7256 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7257 (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
7258 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
7260 // Pattern for lane 0
7261 class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
7262 RegisterOperand ResVPR, ValueType ResTy>
7263 : Pat<(ResTy (op (ResTy ResVPR:$Rn),
7264 (ResTy (Neon_vdup (f32 FPR32:$Re))),
7265 (ResTy ResVPR:$src))),
7266 (INST ResVPR:$src, ResVPR:$Rn,
7267 (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7269 // Pattern for lane in 64-bit vector
7270 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7271 RegisterOperand ResVPR, RegisterOperand OpVPR,
7272 ValueType ResTy, ValueType OpTy,
7273 SDPatternOperator coreop>
7274 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7275 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7276 (INST ResVPR:$src, ResVPR:$Rn,
7277 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
7279 // Pattern for lane in 64-bit vector
7280 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
7281 SDPatternOperator op,
7282 RegisterOperand ResVPR, RegisterOperand OpVPR,
7283 ValueType ResTy, ValueType OpTy,
7284 SDPatternOperator coreop>
7285 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
7286 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7287 (INST ResVPR:$src, ResVPR:$Rn,
7288 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
7291 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
7292 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7293 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7294 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7296 def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
7299 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7300 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7301 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7303 def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
7306 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7307 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7308 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7310 // Index can only be half of the max value for lane in 64-bit vector
7312 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7313 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7314 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7316 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7317 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7318 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7321 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
7323 // Pattern for lane 0
7324 class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
7325 RegisterOperand ResVPR, ValueType ResTy>
7326 : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
7327 (ResTy (Neon_vdup (f32 FPR32:$Re))),
7328 (ResTy ResVPR:$src))),
7329 (INST ResVPR:$src, ResVPR:$Rn,
7330 (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7332 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
7334 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7335 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7336 BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7338 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7339 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7340 BinOpFrag<(Neon_vduplane
7341 (fneg node:$LHS), node:$RHS)>>;
7343 def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
7346 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7347 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7348 BinOpFrag<(fneg (Neon_vduplane
7349 node:$LHS, node:$RHS))>>;
7351 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7352 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7353 BinOpFrag<(Neon_vduplane
7354 (fneg node:$LHS), node:$RHS)>>;
7356 def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
7359 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7360 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7361 BinOpFrag<(fneg (Neon_vduplane
7362 node:$LHS, node:$RHS))>>;
7364 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7365 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7366 BinOpFrag<(Neon_vduplane
7367 (fneg node:$LHS), node:$RHS)>>;
7369 // Index can only be half of the max value for lane in 64-bit vector
7371 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7372 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7373 BinOpFrag<(fneg (Neon_vduplane
7374 node:$LHS, node:$RHS))>>;
7376 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7377 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7378 BinOpFrag<(Neon_vduplane
7379 (fneg node:$LHS), node:$RHS)>>;
7381 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7382 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7383 BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7385 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7386 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7387 BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
7389 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7390 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7391 BinOpFrag<(fneg (Neon_combine_2d
7392 node:$LHS, node:$RHS))>>;
7394 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7395 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7396 BinOpFrag<(Neon_combine_2d
7397 (fneg node:$LHS), (fneg node:$RHS))>>;
7400 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
7402 // Variant 3: Long type
7403 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
7404 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
7406 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
7407 // vector register class for element is always 128-bit to cover the max index
7408 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7409 neon_uimm2_bare, VPR128, VPR64, VPR128> {
7410 let Inst{11} = {Index{1}};
7411 let Inst{21} = {Index{0}};
7412 let Inst{20-16} = Re;
7415 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7416 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7417 let Inst{11} = {Index{1}};
7418 let Inst{21} = {Index{0}};
7419 let Inst{20-16} = Re;
7422 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7423 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7424 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7425 let Inst{11} = {Index{2}};
7426 let Inst{21} = {Index{1}};
7427 let Inst{20} = {Index{0}};
7428 let Inst{19-16} = Re{3-0};
7431 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7432 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7433 let Inst{11} = {Index{2}};
7434 let Inst{21} = {Index{1}};
7435 let Inst{20} = {Index{0}};
7436 let Inst{19-16} = Re{3-0};
7440 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
7441 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
7442 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
7443 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
7444 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
7445 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
7447 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
7448 // vector register class for element is always 128-bit to cover the max index
7449 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7450 neon_uimm2_bare, VPR128, VPR64, VPR128> {
7451 let Inst{11} = {Index{1}};
7452 let Inst{21} = {Index{0}};
7453 let Inst{20-16} = Re;
7456 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7457 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7458 let Inst{11} = {Index{1}};
7459 let Inst{21} = {Index{0}};
7460 let Inst{20-16} = Re;
7463 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7464 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7465 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7466 let Inst{11} = {Index{2}};
7467 let Inst{21} = {Index{1}};
7468 let Inst{20} = {Index{0}};
7469 let Inst{19-16} = Re{3-0};
7472 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7473 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7474 let Inst{11} = {Index{2}};
7475 let Inst{21} = {Index{1}};
7476 let Inst{20} = {Index{0}};
7477 let Inst{19-16} = Re{3-0};
7481 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
7482 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
7483 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
7485 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
7488 // Pattern for lane in 128-bit vector
7489 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7490 RegisterOperand EleOpVPR, ValueType ResTy,
7491 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7492 SDPatternOperator hiop>
7493 : Pat<(ResTy (op (ResTy VPR128:$src),
7494 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7495 (HalfOpTy (Neon_vduplane
7496 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7497 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7499 // Pattern for lane in 64-bit vector
7500 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7501 RegisterOperand EleOpVPR, ValueType ResTy,
7502 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7503 SDPatternOperator hiop>
7504 : Pat<(ResTy (op (ResTy VPR128:$src),
7505 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7506 (HalfOpTy (Neon_vduplane
7507 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7508 (INST VPR128:$src, VPR128:$Rn,
7509 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7511 class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
7512 ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7513 SDPatternOperator hiop, Instruction DupInst>
7514 : Pat<(ResTy (op (ResTy VPR128:$src),
7515 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7516 (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7517 (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
7519 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
7520 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7521 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7523 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7524 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
7526 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7527 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7529 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7530 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7532 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7533 op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7535 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7536 op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7538 // Index can only be half of the max value for lane in 64-bit vector
7540 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7541 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7543 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7544 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
7546 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7547 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7549 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7550 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7553 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
7554 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
7555 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
7556 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
7558 // Pattern for lane in 128-bit vector
7559 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7560 RegisterOperand EleOpVPR, ValueType ResTy,
7561 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7562 SDPatternOperator hiop>
7564 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7565 (HalfOpTy (Neon_vduplane
7566 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7567 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7569 // Pattern for lane in 64-bit vector
7570 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7571 RegisterOperand EleOpVPR, ValueType ResTy,
7572 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7573 SDPatternOperator hiop>
7575 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7576 (HalfOpTy (Neon_vduplane
7577 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7579 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7581 // Pattern for fixed lane 0
7582 class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
7583 ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7584 SDPatternOperator hiop, Instruction DupInst>
7586 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7587 (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7588 (INST VPR128:$Rn, (DupInst $Re), 0)>;
7590 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
7591 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7592 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7594 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7595 op, VPR64, VPR128, v2i64, v2i32, v4i32>;
7597 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7598 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7600 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7601 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7603 def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
7604 op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7606 def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
7607 op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7609 // Index can only be half of the max value for lane in 64-bit vector
7611 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7612 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7614 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7615 op, VPR64, VPR64, v2i64, v2i32, v2i32>;
7617 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7618 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7620 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7621 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7624 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
7625 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
7626 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
7628 multiclass NI_qdma<SDPatternOperator op> {
7629 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7631 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7633 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7635 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7638 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
7639 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
7641 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
7642 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7643 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
7644 v4i32, v4i16, v8i16>;
7646 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7647 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
7648 v2i64, v2i32, v4i32>;
7650 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7651 !cast<PatFrag>(op # "_4s"), VPR128Lo,
7652 v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7654 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7655 !cast<PatFrag>(op # "_2d"), VPR128,
7656 v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7658 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7659 !cast<PatFrag>(op # "_4s"),
7660 v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7662 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7663 !cast<PatFrag>(op # "_2d"),
7664 v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7666 // Index can only be half of the max value for lane in 64-bit vector
7668 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7669 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
7670 v4i32, v4i16, v4i16>;
7672 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7673 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
7674 v2i64, v2i32, v2i32>;
7676 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7677 !cast<PatFrag>(op # "_4s"), VPR64Lo,
7678 v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7680 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7681 !cast<PatFrag>(op # "_2d"), VPR64,
7682 v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7685 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
7686 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
7688 // End of implementation for instruction class (3V Elem)
7690 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
7691 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
7692 SDPatternOperator Neon_Rev>
7693 : NeonI_2VMisc<Q, U, size, opcode,
7694 (outs ResVPR:$Rd), (ins ResVPR:$Rn),
7695 asmop # "\t$Rd." # Res # ", $Rn." # Res,
7696 [(set (ResTy ResVPR:$Rd),
7697 (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
7700 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
7702 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
7704 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
7706 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
7708 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
7710 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
7713 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
7714 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
7716 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
7718 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
7720 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
7722 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
7725 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
7727 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
7730 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
7731 SDPatternOperator Neon_Padd> {
7732 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7733 (outs VPR128:$Rd), (ins VPR128:$Rn),
7734 asmop # "\t$Rd.8h, $Rn.16b",
7735 [(set (v8i16 VPR128:$Rd),
7736 (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
7739 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7740 (outs VPR64:$Rd), (ins VPR64:$Rn),
7741 asmop # "\t$Rd.4h, $Rn.8b",
7742 [(set (v4i16 VPR64:$Rd),
7743 (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
7746 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7747 (outs VPR128:$Rd), (ins VPR128:$Rn),
7748 asmop # "\t$Rd.4s, $Rn.8h",
7749 [(set (v4i32 VPR128:$Rd),
7750 (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
7753 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7754 (outs VPR64:$Rd), (ins VPR64:$Rn),
7755 asmop # "\t$Rd.2s, $Rn.4h",
7756 [(set (v2i32 VPR64:$Rd),
7757 (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
7760 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7761 (outs VPR128:$Rd), (ins VPR128:$Rn),
7762 asmop # "\t$Rd.2d, $Rn.4s",
7763 [(set (v2i64 VPR128:$Rd),
7764 (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
7767 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7768 (outs VPR64:$Rd), (ins VPR64:$Rn),
7769 asmop # "\t$Rd.1d, $Rn.2s",
7770 [(set (v1i64 VPR64:$Rd),
7771 (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
7775 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
7776 int_arm_neon_vpaddls>;
7777 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
7778 int_arm_neon_vpaddlu>;
7780 def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
7782 def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
7785 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
7786 SDPatternOperator Neon_Padd> {
7787 let Constraints = "$src = $Rd" in {
7788 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7789 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7790 asmop # "\t$Rd.8h, $Rn.16b",
7791 [(set (v8i16 VPR128:$Rd),
7793 (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
7796 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7797 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7798 asmop # "\t$Rd.4h, $Rn.8b",
7799 [(set (v4i16 VPR64:$Rd),
7801 (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
7804 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7805 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7806 asmop # "\t$Rd.4s, $Rn.8h",
7807 [(set (v4i32 VPR128:$Rd),
7809 (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
7812 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7813 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7814 asmop # "\t$Rd.2s, $Rn.4h",
7815 [(set (v2i32 VPR64:$Rd),
7817 (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
7820 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7821 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7822 asmop # "\t$Rd.2d, $Rn.4s",
7823 [(set (v2i64 VPR128:$Rd),
7825 (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
7828 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7829 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7830 asmop # "\t$Rd.1d, $Rn.2s",
7831 [(set (v1i64 VPR64:$Rd),
7833 (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
7838 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
7839 int_arm_neon_vpadals>;
7840 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
7841 int_arm_neon_vpadalu>;
7843 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
7844 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7845 (outs VPR128:$Rd), (ins VPR128:$Rn),
7846 asmop # "\t$Rd.16b, $Rn.16b",
7849 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7850 (outs VPR128:$Rd), (ins VPR128:$Rn),
7851 asmop # "\t$Rd.8h, $Rn.8h",
7854 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7855 (outs VPR128:$Rd), (ins VPR128:$Rn),
7856 asmop # "\t$Rd.4s, $Rn.4s",
7859 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7860 (outs VPR128:$Rd), (ins VPR128:$Rn),
7861 asmop # "\t$Rd.2d, $Rn.2d",
7864 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7865 (outs VPR64:$Rd), (ins VPR64:$Rn),
7866 asmop # "\t$Rd.8b, $Rn.8b",
7869 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7870 (outs VPR64:$Rd), (ins VPR64:$Rn),
7871 asmop # "\t$Rd.4h, $Rn.4h",
7874 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7875 (outs VPR64:$Rd), (ins VPR64:$Rn),
7876 asmop # "\t$Rd.2s, $Rn.2s",
7880 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
7881 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
7882 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
7883 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
7885 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
7886 SDPatternOperator Neon_Op> {
7887 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
7888 (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
7890 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
7891 (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
7893 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
7894 (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
7896 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
7897 (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
7899 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
7900 (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
7902 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
7903 (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
7905 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
7906 (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
7909 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
7910 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
7911 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
7913 def : Pat<(v16i8 (sub
7914 (v16i8 Neon_AllZero),
7915 (v16i8 VPR128:$Rn))),
7916 (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
7917 def : Pat<(v8i8 (sub
7918 (v8i8 Neon_AllZero),
7920 (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
7921 def : Pat<(v8i16 (sub
7922 (v8i16 (bitconvert (v16i8 Neon_AllZero))),
7923 (v8i16 VPR128:$Rn))),
7924 (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
7925 def : Pat<(v4i16 (sub
7926 (v4i16 (bitconvert (v8i8 Neon_AllZero))),
7927 (v4i16 VPR64:$Rn))),
7928 (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
7929 def : Pat<(v4i32 (sub
7930 (v4i32 (bitconvert (v16i8 Neon_AllZero))),
7931 (v4i32 VPR128:$Rn))),
7932 (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
7933 def : Pat<(v2i32 (sub
7934 (v2i32 (bitconvert (v8i8 Neon_AllZero))),
7935 (v2i32 VPR64:$Rn))),
7936 (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
7937 def : Pat<(v2i64 (sub
7938 (v2i64 (bitconvert (v16i8 Neon_AllZero))),
7939 (v2i64 VPR128:$Rn))),
7940 (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
7942 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
7943 let Constraints = "$src = $Rd" in {
7944 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7945 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7946 asmop # "\t$Rd.16b, $Rn.16b",
7949 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7950 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7951 asmop # "\t$Rd.8h, $Rn.8h",
7954 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7955 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7956 asmop # "\t$Rd.4s, $Rn.4s",
7959 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7960 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7961 asmop # "\t$Rd.2d, $Rn.2d",
7964 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7965 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7966 asmop # "\t$Rd.8b, $Rn.8b",
7969 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7970 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7971 asmop # "\t$Rd.4h, $Rn.4h",
7974 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7975 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7976 asmop # "\t$Rd.2s, $Rn.2s",
7981 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
7982 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
7984 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
7985 SDPatternOperator Neon_Op> {
7986 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
7987 (v16i8 (!cast<Instruction>(Prefix # 16b)
7988 (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
7990 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
7991 (v8i16 (!cast<Instruction>(Prefix # 8h)
7992 (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
7994 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
7995 (v4i32 (!cast<Instruction>(Prefix # 4s)
7996 (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
7998 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
7999 (v2i64 (!cast<Instruction>(Prefix # 2d)
8000 (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
8002 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
8003 (v8i8 (!cast<Instruction>(Prefix # 8b)
8004 (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
8006 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
8007 (v4i16 (!cast<Instruction>(Prefix # 4h)
8008 (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
8010 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
8011 (v2i32 (!cast<Instruction>(Prefix # 2s)
8012 (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
8015 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
8016 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
8018 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
8019 SDPatternOperator Neon_Op> {
8020 def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
8021 (outs VPR128:$Rd), (ins VPR128:$Rn),
8022 asmop # "\t$Rd.16b, $Rn.16b",
8023 [(set (v16i8 VPR128:$Rd),
8024 (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
8027 def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
8028 (outs VPR128:$Rd), (ins VPR128:$Rn),
8029 asmop # "\t$Rd.8h, $Rn.8h",
8030 [(set (v8i16 VPR128:$Rd),
8031 (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
8034 def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
8035 (outs VPR128:$Rd), (ins VPR128:$Rn),
8036 asmop # "\t$Rd.4s, $Rn.4s",
8037 [(set (v4i32 VPR128:$Rd),
8038 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8041 def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
8042 (outs VPR64:$Rd), (ins VPR64:$Rn),
8043 asmop # "\t$Rd.8b, $Rn.8b",
8044 [(set (v8i8 VPR64:$Rd),
8045 (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
8048 def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
8049 (outs VPR64:$Rd), (ins VPR64:$Rn),
8050 asmop # "\t$Rd.4h, $Rn.4h",
8051 [(set (v4i16 VPR64:$Rd),
8052 (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
8055 def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
8056 (outs VPR64:$Rd), (ins VPR64:$Rn),
8057 asmop # "\t$Rd.2s, $Rn.2s",
8058 [(set (v2i32 VPR64:$Rd),
8059 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8063 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
8064 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
8066 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
8068 def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
8069 (outs VPR128:$Rd), (ins VPR128:$Rn),
8070 asmop # "\t$Rd.16b, $Rn.16b",
8073 def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
8074 (outs VPR64:$Rd), (ins VPR64:$Rn),
8075 asmop # "\t$Rd.8b, $Rn.8b",
8079 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
8080 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
8081 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
8083 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
8084 (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
8085 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
8086 (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
8088 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
8089 (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
8090 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
8091 (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
8093 def : Pat<(v16i8 (xor
8095 (v16i8 Neon_AllOne))),
8096 (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
8097 def : Pat<(v8i8 (xor
8099 (v8i8 Neon_AllOne))),
8100 (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
8101 def : Pat<(v8i16 (xor
8103 (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
8104 (NOT16b VPR128:$Rn)>;
8105 def : Pat<(v4i16 (xor
8107 (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
8109 def : Pat<(v4i32 (xor
8111 (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
8112 (NOT16b VPR128:$Rn)>;
8113 def : Pat<(v2i32 (xor
8115 (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
8117 def : Pat<(v2i64 (xor
8119 (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
8120 (NOT16b VPR128:$Rn)>;
8122 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
8123 (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
8124 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
8125 (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
8127 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
8128 SDPatternOperator Neon_Op> {
8129 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8130 (outs VPR128:$Rd), (ins VPR128:$Rn),
8131 asmop # "\t$Rd.4s, $Rn.4s",
8132 [(set (v4f32 VPR128:$Rd),
8133 (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
8136 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8137 (outs VPR128:$Rd), (ins VPR128:$Rn),
8138 asmop # "\t$Rd.2d, $Rn.2d",
8139 [(set (v2f64 VPR128:$Rd),
8140 (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
8143 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8144 (outs VPR64:$Rd), (ins VPR64:$Rn),
8145 asmop # "\t$Rd.2s, $Rn.2s",
8146 [(set (v2f32 VPR64:$Rd),
8147 (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
8151 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
8152 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
8154 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
8155 def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8156 (outs VPR64:$Rd), (ins VPR128:$Rn),
8157 asmop # "\t$Rd.8b, $Rn.8h",
8160 def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8161 (outs VPR64:$Rd), (ins VPR128:$Rn),
8162 asmop # "\t$Rd.4h, $Rn.4s",
8165 def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8166 (outs VPR64:$Rd), (ins VPR128:$Rn),
8167 asmop # "\t$Rd.2s, $Rn.2d",
8170 let Constraints = "$Rd = $src" in {
8171 def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8172 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8173 asmop # "2\t$Rd.16b, $Rn.8h",
8176 def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8177 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8178 asmop # "2\t$Rd.8h, $Rn.4s",
8181 def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8182 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8183 asmop # "2\t$Rd.4s, $Rn.2d",
8188 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
8189 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
8190 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
8191 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
8193 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
8194 SDPatternOperator Neon_Op> {
8195 def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
8196 (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
8198 def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
8199 (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
8201 def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
8202 (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
8204 def : Pat<(v16i8 (concat_vectors
8206 (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
8207 (!cast<Instruction>(Prefix # 8h16b)
8208 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8211 def : Pat<(v8i16 (concat_vectors
8213 (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
8214 (!cast<Instruction>(Prefix # 4s8h)
8215 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8218 def : Pat<(v4i32 (concat_vectors
8220 (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
8221 (!cast<Instruction>(Prefix # 2d4s)
8222 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8226 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
8227 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
8228 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
8229 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
8231 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
8232 let DecoderMethod = "DecodeSHLLInstruction" in {
8233 def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8235 (ins VPR64:$Rn, uimm_exact8:$Imm),
8236 asmop # "\t$Rd.8h, $Rn.8b, $Imm",
8239 def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8241 (ins VPR64:$Rn, uimm_exact16:$Imm),
8242 asmop # "\t$Rd.4s, $Rn.4h, $Imm",
8245 def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8247 (ins VPR64:$Rn, uimm_exact32:$Imm),
8248 asmop # "\t$Rd.2d, $Rn.2s, $Imm",
8251 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8253 (ins VPR128:$Rn, uimm_exact8:$Imm),
8254 asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
8257 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8259 (ins VPR128:$Rn, uimm_exact16:$Imm),
8260 asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
8263 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8265 (ins VPR128:$Rn, uimm_exact32:$Imm),
8266 asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
8271 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
8273 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
8274 SDPatternOperator ExtOp, Operand Neon_Imm,
8277 (DesTy (ExtOp (OpTy VPR64:$Rn))),
8279 (i32 Neon_Imm:$Imm))))),
8280 (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
8282 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
8283 SDPatternOperator ExtOp, Operand Neon_Imm,
8284 string suffix, PatFrag GetHigh>
8287 (OpTy (GetHigh VPR128:$Rn)))),
8289 (i32 Neon_Imm:$Imm))))),
8290 (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
8292 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
8293 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
8294 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
8295 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
8296 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
8297 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
8298 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
8300 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
8302 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
8304 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
8306 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
8308 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
8311 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
8312 def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8313 (outs VPR64:$Rd), (ins VPR128:$Rn),
8314 asmop # "\t$Rd.4h, $Rn.4s",
8317 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8318 (outs VPR64:$Rd), (ins VPR128:$Rn),
8319 asmop # "\t$Rd.2s, $Rn.2d",
8322 let Constraints = "$src = $Rd" in {
8323 def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8324 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8325 asmop # "2\t$Rd.8h, $Rn.4s",
8328 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8329 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8330 asmop # "2\t$Rd.4s, $Rn.2d",
8335 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
8337 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
8338 SDPatternOperator f32_to_f16_Op,
8339 SDPatternOperator f64_to_f32_Op> {
8341 def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
8342 (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
8344 def : Pat<(v8i16 (concat_vectors
8346 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
8347 (!cast<Instruction>(prefix # "4s8h")
8348 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8349 (v4f32 VPR128:$Rn))>;
8351 def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
8352 (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
8354 def : Pat<(v4f32 (concat_vectors
8356 (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
8357 (!cast<Instruction>(prefix # "2d4s")
8358 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8359 (v2f64 VPR128:$Rn))>;
8362 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
8364 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
8366 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8367 (outs VPR64:$Rd), (ins VPR128:$Rn),
8368 asmop # "\t$Rd.2s, $Rn.2d",
8371 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8372 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8373 asmop # "2\t$Rd.4s, $Rn.2d",
8375 let Constraints = "$src = $Rd";
8378 def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
8379 (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
8381 def : Pat<(v4f32 (concat_vectors
8383 (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
8384 (!cast<Instruction>(prefix # "2d4s")
8385 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8389 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
8391 def Neon_High4Float : PatFrag<(ops node:$in),
8392 (extract_subvector (v4f32 node:$in), (iPTR 2))>;
8394 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
8395 def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
8396 (outs VPR128:$Rd), (ins VPR64:$Rn),
8397 asmop # "\t$Rd.4s, $Rn.4h",
8400 def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
8401 (outs VPR128:$Rd), (ins VPR64:$Rn),
8402 asmop # "\t$Rd.2d, $Rn.2s",
8405 def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
8406 (outs VPR128:$Rd), (ins VPR128:$Rn),
8407 asmop # "2\t$Rd.4s, $Rn.8h",
8410 def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
8411 (outs VPR128:$Rd), (ins VPR128:$Rn),
8412 asmop # "2\t$Rd.2d, $Rn.4s",
8416 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
8418 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
8419 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
8420 (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
8422 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
8424 (v8i16 VPR128:$Rn))))),
8425 (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
8427 def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
8428 (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
8430 def : Pat<(v2f64 (fextend
8431 (v2f32 (Neon_High4Float
8432 (v4f32 VPR128:$Rn))))),
8433 (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
8436 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
8438 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
8439 ValueType ResTy4s, ValueType OpTy4s,
8440 ValueType ResTy2d, ValueType OpTy2d,
8441 ValueType ResTy2s, ValueType OpTy2s,
8442 SDPatternOperator Neon_Op> {
8444 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8445 (outs VPR128:$Rd), (ins VPR128:$Rn),
8446 asmop # "\t$Rd.4s, $Rn.4s",
8447 [(set (ResTy4s VPR128:$Rd),
8448 (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
8451 def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
8452 (outs VPR128:$Rd), (ins VPR128:$Rn),
8453 asmop # "\t$Rd.2d, $Rn.2d",
8454 [(set (ResTy2d VPR128:$Rd),
8455 (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
8458 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8459 (outs VPR64:$Rd), (ins VPR64:$Rn),
8460 asmop # "\t$Rd.2s, $Rn.2s",
8461 [(set (ResTy2s VPR64:$Rd),
8462 (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
8466 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
8467 bits<5> opcode, SDPatternOperator Neon_Op> {
8468 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
8469 v2f64, v2i32, v2f32, Neon_Op>;
8472 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
8473 int_arm_neon_vcvtns>;
8474 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
8475 int_arm_neon_vcvtnu>;
8476 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
8477 int_arm_neon_vcvtps>;
8478 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
8479 int_arm_neon_vcvtpu>;
8480 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
8481 int_arm_neon_vcvtms>;
8482 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
8483 int_arm_neon_vcvtmu>;
8484 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
8485 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
8486 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
8487 int_arm_neon_vcvtas>;
8488 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
8489 int_arm_neon_vcvtau>;
8491 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
8492 bits<5> opcode, SDPatternOperator Neon_Op> {
8493 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
8494 v2i64, v2f32, v2i32, Neon_Op>;
8497 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
8498 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
8500 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
8501 bits<5> opcode, SDPatternOperator Neon_Op> {
8502 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
8503 v2f64, v2f32, v2f32, Neon_Op>;
8506 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
8507 int_aarch64_neon_frintn>;
8508 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
8509 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
8510 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
8511 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
8512 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
8513 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
8514 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
8515 int_arm_neon_vrecpe>;
8516 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
8517 int_arm_neon_vrsqrte>;
8518 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
8520 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
8521 bits<5> opcode, SDPatternOperator Neon_Op> {
8522 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8523 (outs VPR128:$Rd), (ins VPR128:$Rn),
8524 asmop # "\t$Rd.4s, $Rn.4s",
8525 [(set (v4i32 VPR128:$Rd),
8526 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8529 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8530 (outs VPR64:$Rd), (ins VPR64:$Rn),
8531 asmop # "\t$Rd.2s, $Rn.2s",
8532 [(set (v2i32 VPR64:$Rd),
8533 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8537 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
8538 int_arm_neon_vrecpe>;
8539 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
8540 int_arm_neon_vrsqrte>;
8543 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
8544 string asmop, SDPatternOperator opnode>
8545 : NeonI_Crypto_AES<size, opcode,
8546 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8547 asmop # "\t$Rd.16b, $Rn.16b",
8548 [(set (v16i8 VPR128:$Rd),
8549 (v16i8 (opnode (v16i8 VPR128:$src),
8550 (v16i8 VPR128:$Rn))))],
8552 let Constraints = "$src = $Rd";
8553 let Predicates = [HasNEON, HasCrypto];
8556 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
8557 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
8559 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
8560 string asmop, SDPatternOperator opnode>
8561 : NeonI_Crypto_AES<size, opcode,
8562 (outs VPR128:$Rd), (ins VPR128:$Rn),
8563 asmop # "\t$Rd.16b, $Rn.16b",
8564 [(set (v16i8 VPR128:$Rd),
8565 (v16i8 (opnode (v16i8 VPR128:$Rn))))],
8568 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
8569 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
8571 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
8572 string asmop, SDPatternOperator opnode>
8573 : NeonI_Crypto_SHA<size, opcode,
8574 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8575 asmop # "\t$Rd.4s, $Rn.4s",
8576 [(set (v4i32 VPR128:$Rd),
8577 (v4i32 (opnode (v4i32 VPR128:$src),
8578 (v4i32 VPR128:$Rn))))],
8580 let Constraints = "$src = $Rd";
8581 let Predicates = [HasNEON, HasCrypto];
8584 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
8585 int_arm_neon_sha1su1>;
8586 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
8587 int_arm_neon_sha256su0>;
8589 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
8590 string asmop, SDPatternOperator opnode>
8591 : NeonI_Crypto_SHA<size, opcode,
8592 (outs FPR32:$Rd), (ins FPR32:$Rn),
8593 asmop # "\t$Rd, $Rn",
8594 [(set (v1i32 FPR32:$Rd),
8595 (v1i32 (opnode (v1i32 FPR32:$Rn))))],
8597 let Predicates = [HasNEON, HasCrypto];
8600 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
8602 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
8603 SDPatternOperator opnode>
8604 : NeonI_Crypto_3VSHA<size, opcode,
8606 (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
8607 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
8608 [(set (v4i32 VPR128:$Rd),
8609 (v4i32 (opnode (v4i32 VPR128:$src),
8611 (v4i32 VPR128:$Rm))))],
8613 let Constraints = "$src = $Rd";
8614 let Predicates = [HasNEON, HasCrypto];
8617 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
8618 int_arm_neon_sha1su0>;
8619 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
8620 int_arm_neon_sha256su1>;
8622 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
8623 SDPatternOperator opnode>
8624 : NeonI_Crypto_3VSHA<size, opcode,
8626 (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
8627 asmop # "\t$Rd, $Rn, $Rm.4s",
8628 [(set (v4i32 FPR128:$Rd),
8629 (v4i32 (opnode (v4i32 FPR128:$src),
8631 (v4i32 VPR128:$Rm))))],
8633 let Constraints = "$src = $Rd";
8634 let Predicates = [HasNEON, HasCrypto];
8637 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
8638 int_arm_neon_sha256h>;
8639 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
8640 int_arm_neon_sha256h2>;
8642 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
8643 SDPatternOperator opnode>
8644 : NeonI_Crypto_3VSHA<size, opcode,
8646 (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
8647 asmop # "\t$Rd, $Rn, $Rm.4s",
8648 [(set (v4i32 FPR128:$Rd),
8649 (v4i32 (opnode (v4i32 FPR128:$src),
8651 (v4i32 VPR128:$Rm))))],
8653 let Constraints = "$src = $Rd";
8654 let Predicates = [HasNEON, HasCrypto];
8657 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
8658 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
8659 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
8662 // Patterns for handling half-precision values
8665 // Convert f16 value coming in as i16 value to f32
8666 def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
8667 (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
8668 def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
8669 (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
8671 def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
8672 f32_to_f16 (f32 FPR32:$Rn))))))),
8675 // Patterns for vector extract of half-precision FP value in i16 storage type
8676 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
8677 (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
8678 (FCVTsh (f16 (DUPhv_H
8679 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8680 neon_uimm2_bare:$Imm)))>;
8682 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
8683 (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
8684 (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
8686 // Patterns for vector insert of half-precision FP value 0 in i16 storage type
8687 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8688 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
8689 (neon_uimm3_bare:$Imm))),
8690 (v8i16 (INSELh (v8i16 VPR128:$Rn),
8691 (v8i16 (SUBREG_TO_REG (i64 0),
8692 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
8694 neon_uimm3_bare:$Imm, 0))>;
8696 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8697 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
8698 (neon_uimm2_bare:$Imm))),
8699 (v4i16 (EXTRACT_SUBREG
8701 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8702 (v8i16 (SUBREG_TO_REG (i64 0),
8703 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
8705 neon_uimm2_bare:$Imm, 0)),
8708 // Patterns for vector insert of half-precision FP value in i16 storage type
8709 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8710 (i32 (assertsext (i32 (fp_to_sint
8711 (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
8712 (neon_uimm3_bare:$Imm))),
8713 (v8i16 (INSELh (v8i16 VPR128:$Rn),
8714 (v8i16 (SUBREG_TO_REG (i64 0),
8715 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
8717 neon_uimm3_bare:$Imm, 0))>;
8719 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8720 (i32 (assertsext (i32 (fp_to_sint
8721 (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
8722 (neon_uimm2_bare:$Imm))),
8723 (v4i16 (EXTRACT_SUBREG
8725 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8726 (v8i16 (SUBREG_TO_REG (i64 0),
8727 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
8729 neon_uimm2_bare:$Imm, 0)),
8732 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8733 (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
8734 (neon_uimm3_bare:$Imm1))),
8735 (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
8736 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
8738 // Patterns for vector copy of half-precision FP value in i16 storage type
8739 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8740 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
8741 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
8743 (neon_uimm3_bare:$Imm1))),
8744 (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
8745 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
8747 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8748 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
8749 (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
8751 (neon_uimm3_bare:$Imm1))),
8752 (v4i16 (EXTRACT_SUBREG
8754 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8755 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
8756 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),