1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
18 // (outs Result), (ins Imm, OpCmode)
19 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
21 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
23 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
25 // (outs Result), (ins Imm)
26 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
27 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
29 // (outs Result), (ins LHS, RHS, CondCode)
30 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
31 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
33 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
34 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
35 [SDTCisVec<0>, SDTCisVec<1>]>>;
37 // (outs Result), (ins LHS, RHS)
38 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
39 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
41 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
43 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
44 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
46 def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
48 def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
49 def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
50 def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
51 def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
52 def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
53 def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
55 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
56 def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
57 def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
58 def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
59 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
61 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
62 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
63 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
64 [SDTCisVec<0>, SDTCisSameAs<0, 1>,
65 SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
67 //===----------------------------------------------------------------------===//
68 // Addressing-mode instantiations
69 //===----------------------------------------------------------------------===//
71 multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> {
72 defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
73 !foreach(decls.pattern, Offset,
74 !subst(OFFSET, dword_uimm12, decls.pattern)),
75 !foreach(decls.pattern, address,
76 !subst(OFFSET, dword_uimm12,
77 !subst(ALIGN, min_align8, decls.pattern))),
81 multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> {
82 defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
83 !foreach(decls.pattern, Offset,
84 !subst(OFFSET, qword_uimm12, decls.pattern)),
85 !foreach(decls.pattern, address,
86 !subst(OFFSET, qword_uimm12,
87 !subst(ALIGN, min_align16, decls.pattern))),
91 multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> {
92 defm : ls_64_pats<address, Base, Offset, v8i8>;
93 defm : ls_64_pats<address, Base, Offset, v4i16>;
94 defm : ls_64_pats<address, Base, Offset, v2i32>;
95 defm : ls_64_pats<address, Base, Offset, v1i64>;
96 defm : ls_64_pats<address, Base, Offset, v2f32>;
97 defm : ls_64_pats<address, Base, Offset, v1f64>;
99 defm : ls_128_pats<address, Base, Offset, v16i8>;
100 defm : ls_128_pats<address, Base, Offset, v8i16>;
101 defm : ls_128_pats<address, Base, Offset, v4i32>;
102 defm : ls_128_pats<address, Base, Offset, v2i64>;
103 defm : ls_128_pats<address, Base, Offset, v4f32>;
104 defm : ls_128_pats<address, Base, Offset, v2f64>;
107 defm : uimm12_neon_pats<(A64WrapperSmall
108 tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
109 (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
111 //===----------------------------------------------------------------------===//
113 //===----------------------------------------------------------------------===//
115 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
116 string asmop, SDPatternOperator opnode8B,
117 SDPatternOperator opnode16B,
118 bit Commutable = 0> {
119 let isCommutable = Commutable in {
120 def _8B : NeonI_3VSame<0b0, u, size, opcode,
121 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
122 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
123 [(set (v8i8 VPR64:$Rd),
124 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
127 def _16B : NeonI_3VSame<0b1, u, size, opcode,
128 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
129 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
130 [(set (v16i8 VPR128:$Rd),
131 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
137 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
138 string asmop, SDPatternOperator opnode,
139 bit Commutable = 0> {
140 let isCommutable = Commutable in {
141 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
142 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
143 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
144 [(set (v4i16 VPR64:$Rd),
145 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
148 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
149 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
150 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
151 [(set (v8i16 VPR128:$Rd),
152 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
155 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
156 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
157 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
158 [(set (v2i32 VPR64:$Rd),
159 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
162 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
163 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
164 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
165 [(set (v4i32 VPR128:$Rd),
166 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
170 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
171 string asmop, SDPatternOperator opnode,
173 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
174 let isCommutable = Commutable in {
175 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
176 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
177 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
178 [(set (v8i8 VPR64:$Rd),
179 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
182 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
183 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
184 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
185 [(set (v16i8 VPR128:$Rd),
186 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
191 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
192 string asmop, SDPatternOperator opnode,
194 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
195 let isCommutable = Commutable in {
196 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
197 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
198 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
199 [(set (v2i64 VPR128:$Rd),
200 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
205 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
206 // but Result types can be integer or floating point types.
207 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
208 string asmop, SDPatternOperator opnode,
209 ValueType ResTy2S, ValueType ResTy4S,
210 ValueType ResTy2D, bit Commutable = 0> {
211 let isCommutable = Commutable in {
212 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
213 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
214 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
215 [(set (ResTy2S VPR64:$Rd),
216 (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
219 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
220 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
221 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
222 [(set (ResTy4S VPR128:$Rd),
223 (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
226 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
227 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
228 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
229 [(set (ResTy2D VPR128:$Rd),
230 (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
235 //===----------------------------------------------------------------------===//
236 // Instruction Definitions
237 //===----------------------------------------------------------------------===//
239 // Vector Arithmetic Instructions
241 // Vector Add (Integer and Floating-Point)
243 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
244 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd,
245 v2f32, v4f32, v2f64, 1>;
247 // Patterns to match add of v1i8/v1i16/v1i32 types
248 def : Pat<(v1i8 (add FPR8:$Rn, FPR8:$Rm)),
250 (ADDvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
251 (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
253 def : Pat<(v1i16 (add FPR16:$Rn, FPR16:$Rm)),
255 (ADDvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
256 (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
258 def : Pat<(v1i32 (add FPR32:$Rn, FPR32:$Rm)),
260 (ADDvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
261 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
264 // Vector Sub (Integer and Floating-Point)
266 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
267 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub,
268 v2f32, v4f32, v2f64, 0>;
270 // Patterns to match sub of v1i8/v1i16/v1i32 types
271 def : Pat<(v1i8 (sub FPR8:$Rn, FPR8:$Rm)),
273 (SUBvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
274 (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
276 def : Pat<(v1i16 (sub FPR16:$Rn, FPR16:$Rm)),
278 (SUBvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
279 (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
281 def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)),
283 (SUBvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
284 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
287 // Vector Multiply (Integer and Floating-Point)
289 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
290 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul,
291 v2f32, v4f32, v2f64, 1>;
293 // Patterns to match mul of v1i8/v1i16/v1i32 types
294 def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)),
296 (MULvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
297 (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
299 def : Pat<(v1i16 (mul FPR16:$Rn, FPR16:$Rm)),
301 (MULvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
302 (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
304 def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)),
306 (MULvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
307 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
310 // Vector Multiply (Polynomial)
312 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
313 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
315 // Vector Multiply-accumulate and Multiply-subtract (Integer)
317 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
318 // two operands constraints.
319 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
320 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
321 bits<5> opcode, SDPatternOperator opnode>
322 : NeonI_3VSame<q, u, size, opcode,
323 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
324 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
325 [(set (OpTy VPRC:$Rd),
326 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
328 let Constraints = "$src = $Rd";
331 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
332 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
334 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
335 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
338 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
339 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
340 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
341 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
342 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
343 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
344 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
345 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
346 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
347 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
348 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
349 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
351 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
352 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
353 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
354 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
355 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
356 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
357 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
358 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
359 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
360 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
361 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
362 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
364 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
366 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
367 (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
369 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
370 (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
372 let Predicates = [HasNEON, UseFusedMAC] in {
373 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
374 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
375 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
376 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
377 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
378 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
380 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
381 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
382 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
383 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
384 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
385 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
388 // We're also allowed to match the fma instruction regardless of compile
390 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
391 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
392 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
393 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
394 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
395 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
397 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
398 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
399 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
400 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
401 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
402 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
404 // Vector Divide (Floating-Point)
406 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv,
407 v2f32, v4f32, v2f64, 0>;
409 // Vector Bitwise Operations
411 // Vector Bitwise AND
413 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
415 // Vector Bitwise Exclusive OR
417 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
421 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
423 // ORR disassembled as MOV if Vn==Vm
425 // Vector Move - register
426 // Alias for ORR if Vn=Vm.
427 // FIXME: This is actually the preferred syntax but TableGen can't deal with
428 // custom printing of aliases.
429 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
430 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
431 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
432 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
434 // The MOVI instruction takes two immediate operands. The first is the
435 // immediate encoding, while the second is the cmode. A cmode of 14, or
436 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
437 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
438 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
440 def Neon_not8B : PatFrag<(ops node:$in),
441 (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
442 def Neon_not16B : PatFrag<(ops node:$in),
443 (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
445 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
446 (or node:$Rn, (Neon_not8B node:$Rm))>;
448 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
449 (or node:$Rn, (Neon_not16B node:$Rm))>;
451 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
452 (and node:$Rn, (Neon_not8B node:$Rm))>;
454 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
455 (and node:$Rn, (Neon_not16B node:$Rm))>;
458 // Vector Bitwise OR NOT - register
460 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
461 Neon_orn8B, Neon_orn16B, 0>;
463 // Vector Bitwise Bit Clear (AND NOT) - register
465 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
466 Neon_bic8B, Neon_bic16B, 0>;
468 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
469 SDPatternOperator opnode16B,
471 Instruction INST16B> {
472 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
473 (INST8B VPR64:$Rn, VPR64:$Rm)>;
474 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
475 (INST8B VPR64:$Rn, VPR64:$Rm)>;
476 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
477 (INST8B VPR64:$Rn, VPR64:$Rm)>;
478 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
479 (INST16B VPR128:$Rn, VPR128:$Rm)>;
480 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
481 (INST16B VPR128:$Rn, VPR128:$Rm)>;
482 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
483 (INST16B VPR128:$Rn, VPR128:$Rm)>;
486 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
487 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
488 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
489 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
490 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
491 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
493 // Vector Bitwise Select
494 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
495 0b0, 0b1, 0b01, 0b00011, vselect>;
497 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
498 0b1, 0b1, 0b01, 0b00011, vselect>;
500 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
502 Instruction INST16B> {
503 // Disassociate type from instruction definition
504 def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
505 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
506 def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
507 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
508 def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
509 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
510 def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
511 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
512 def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
513 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
514 def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
515 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
516 def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
517 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
518 def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
519 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
520 def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
521 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
522 def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
523 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
524 def : Pat<(v2f64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
525 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
526 def : Pat<(v4f32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
527 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
529 // Allow to match BSL instruction pattern with non-constant operand
530 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
531 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
532 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
533 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
534 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
535 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
536 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
537 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
538 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
539 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
540 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
541 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
542 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
543 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
544 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
545 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
546 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
547 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
548 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
549 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
550 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
551 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
552 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
553 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
555 // Allow to match llvm.arm.* intrinsics.
556 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
557 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
558 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
559 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
560 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
561 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
562 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
563 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
564 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
565 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
566 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
567 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
568 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
569 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
570 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
571 def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
572 (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
573 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
574 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
575 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
576 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
577 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
578 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
579 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
580 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
581 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
582 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
583 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
584 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
585 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
586 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
587 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
588 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
589 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
590 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
591 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
594 // Additional patterns for bitwise instruction BSL
595 defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
597 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
598 (vselect node:$src, node:$Rn, node:$Rm),
599 [{ (void)N; return false; }]>;
601 // Vector Bitwise Insert if True
603 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
604 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
605 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
606 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
608 // Vector Bitwise Insert if False
610 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
611 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
612 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
613 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
615 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
617 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
618 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
619 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
620 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
622 // Vector Absolute Difference and Accumulate (Unsigned)
623 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
624 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
625 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
626 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
627 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
628 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
629 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
630 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
631 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
632 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
633 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
634 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
636 // Vector Absolute Difference and Accumulate (Signed)
637 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
638 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
639 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
640 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
641 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
642 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
643 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
644 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
645 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
646 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
647 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
648 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
651 // Vector Absolute Difference (Signed, Unsigned)
652 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
653 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
655 // Vector Absolute Difference (Floating Point)
656 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
657 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
659 // Vector Reciprocal Step (Floating Point)
660 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
662 v2f32, v4f32, v2f64, 0>;
664 // Vector Reciprocal Square Root Step (Floating Point)
665 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
666 int_arm_neon_vrsqrts,
667 v2f32, v4f32, v2f64, 0>;
669 // Vector Comparisons
671 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
672 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
673 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
674 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
675 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
676 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
677 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
678 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
679 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
680 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
682 // NeonI_compare_aliases class: swaps register operands to implement
683 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
684 class NeonI_compare_aliases<string asmop, string asmlane,
685 Instruction inst, RegisterOperand VPRC>
686 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
688 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
690 // Vector Comparisons (Integer)
692 // Vector Compare Mask Equal (Integer)
693 let isCommutable =1 in {
694 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
697 // Vector Compare Mask Higher or Same (Unsigned Integer)
698 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
700 // Vector Compare Mask Greater Than or Equal (Integer)
701 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
703 // Vector Compare Mask Higher (Unsigned Integer)
704 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
706 // Vector Compare Mask Greater Than (Integer)
707 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
709 // Vector Compare Mask Bitwise Test (Integer)
710 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
712 // Vector Compare Mask Less or Same (Unsigned Integer)
713 // CMLS is alias for CMHS with operands reversed.
714 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
715 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
716 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
717 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
718 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
719 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
720 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
722 // Vector Compare Mask Less Than or Equal (Integer)
723 // CMLE is alias for CMGE with operands reversed.
724 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
725 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
726 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
727 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
728 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
729 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
730 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
732 // Vector Compare Mask Lower (Unsigned Integer)
733 // CMLO is alias for CMHI with operands reversed.
734 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
735 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
736 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
737 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
738 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
739 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
740 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
742 // Vector Compare Mask Less Than (Integer)
743 // CMLT is alias for CMGT with operands reversed.
744 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
745 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
746 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
747 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
748 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
749 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
750 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
753 def neon_uimm0_asmoperand : AsmOperandClass
756 let PredicateMethod = "isUImm<0>";
757 let RenderMethod = "addImmOperands";
760 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
761 let ParserMatchClass = neon_uimm0_asmoperand;
762 let PrintMethod = "printNeonUImm0Operand";
766 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
768 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
769 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
770 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
771 [(set (v8i8 VPR64:$Rd),
772 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
775 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
776 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
777 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
778 [(set (v16i8 VPR128:$Rd),
779 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
782 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
783 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
784 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
785 [(set (v4i16 VPR64:$Rd),
786 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
789 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
790 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
791 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
792 [(set (v8i16 VPR128:$Rd),
793 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
796 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
797 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
798 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
799 [(set (v2i32 VPR64:$Rd),
800 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
803 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
804 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
805 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
806 [(set (v4i32 VPR128:$Rd),
807 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
810 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
811 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
812 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
813 [(set (v2i64 VPR128:$Rd),
814 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
818 // Vector Compare Mask Equal to Zero (Integer)
819 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
821 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
822 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
824 // Vector Compare Mask Greater Than Zero (Signed Integer)
825 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
827 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
828 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
830 // Vector Compare Mask Less Than Zero (Signed Integer)
831 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
833 // Vector Comparisons (Floating Point)
835 // Vector Compare Mask Equal (Floating Point)
836 let isCommutable =1 in {
837 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
838 v2i32, v4i32, v2i64, 0>;
841 // Vector Compare Mask Greater Than Or Equal (Floating Point)
842 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
843 v2i32, v4i32, v2i64, 0>;
845 // Vector Compare Mask Greater Than (Floating Point)
846 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
847 v2i32, v4i32, v2i64, 0>;
849 // Vector Compare Mask Less Than Or Equal (Floating Point)
850 // FCMLE is alias for FCMGE with operands reversed.
851 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
852 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
853 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
855 // Vector Compare Mask Less Than (Floating Point)
856 // FCMLT is alias for FCMGT with operands reversed.
857 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
858 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
859 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
861 def fpzero_izero_asmoperand : AsmOperandClass {
862 let Name = "FPZeroIZero";
863 let ParserMethod = "ParseFPImm0AndImm0Operand";
864 let DiagnosticType = "FPZero";
867 def fpzz32 : Operand<f32>,
868 ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
869 let ParserMatchClass = fpzero_izero_asmoperand;
870 let PrintMethod = "printFPZeroOperand";
871 let DecoderMethod = "DecodeFPZeroOperand";
874 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
875 string asmop, CondCode CC>
877 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
878 (outs VPR64:$Rd), (ins VPR64:$Rn, fpzz32:$FPImm),
879 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
880 [(set (v2i32 VPR64:$Rd),
881 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))],
884 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
885 (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
886 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
887 [(set (v4i32 VPR128:$Rd),
888 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
891 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
892 (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
893 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
894 [(set (v2i64 VPR128:$Rd),
895 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
899 // Vector Compare Mask Equal to Zero (Floating Point)
900 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
902 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
903 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
905 // Vector Compare Mask Greater Than Zero (Floating Point)
906 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
908 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
909 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
911 // Vector Compare Mask Less Than Zero (Floating Point)
912 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
914 // Vector Absolute Comparisons (Floating Point)
916 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
917 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
919 v2i32, v4i32, v2i64, 0>;
921 // Vector Absolute Compare Mask Greater Than (Floating Point)
922 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
924 v2i32, v4i32, v2i64, 0>;
926 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
927 // FACLE is alias for FACGE with operands reversed.
928 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
929 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
930 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
932 // Vector Absolute Compare Mask Less Than (Floating Point)
933 // FACLT is alias for FACGT with operands reversed.
934 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
935 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
936 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
938 // Vector halving add (Integer Signed, Unsigned)
939 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
940 int_arm_neon_vhadds, 1>;
941 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
942 int_arm_neon_vhaddu, 1>;
944 // Vector halving sub (Integer Signed, Unsigned)
945 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
946 int_arm_neon_vhsubs, 0>;
947 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
948 int_arm_neon_vhsubu, 0>;
950 // Vector rouding halving add (Integer Signed, Unsigned)
951 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
952 int_arm_neon_vrhadds, 1>;
953 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
954 int_arm_neon_vrhaddu, 1>;
956 // Vector Saturating add (Integer Signed, Unsigned)
957 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
958 int_arm_neon_vqadds, 1>;
959 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
960 int_arm_neon_vqaddu, 1>;
962 // Vector Saturating sub (Integer Signed, Unsigned)
963 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
964 int_arm_neon_vqsubs, 1>;
965 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
966 int_arm_neon_vqsubu, 1>;
968 // Vector Shift Left (Signed and Unsigned Integer)
969 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
970 int_arm_neon_vshifts, 1>;
971 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
972 int_arm_neon_vshiftu, 1>;
974 // Vector Saturating Shift Left (Signed and Unsigned Integer)
975 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
976 int_arm_neon_vqshifts, 1>;
977 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
978 int_arm_neon_vqshiftu, 1>;
980 // Vector Rouding Shift Left (Signed and Unsigned Integer)
981 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
982 int_arm_neon_vrshifts, 1>;
983 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
984 int_arm_neon_vrshiftu, 1>;
986 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
987 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
988 int_arm_neon_vqrshifts, 1>;
989 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
990 int_arm_neon_vqrshiftu, 1>;
992 // Vector Maximum (Signed and Unsigned Integer)
993 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
994 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
996 // Vector Minimum (Signed and Unsigned Integer)
997 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
998 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
1000 // Vector Maximum (Floating Point)
1001 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
1003 v2f32, v4f32, v2f64, 1>;
1005 // Vector Minimum (Floating Point)
1006 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
1008 v2f32, v4f32, v2f64, 1>;
1010 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
1011 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
1012 int_aarch64_neon_vmaxnm,
1013 v2f32, v4f32, v2f64, 1>;
1015 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
1016 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
1017 int_aarch64_neon_vminnm,
1018 v2f32, v4f32, v2f64, 1>;
1020 // Vector Maximum Pairwise (Signed and Unsigned Integer)
1021 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
1022 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
1024 // Vector Minimum Pairwise (Signed and Unsigned Integer)
1025 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
1026 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
1028 // Vector Maximum Pairwise (Floating Point)
1029 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
1030 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
1032 // Vector Minimum Pairwise (Floating Point)
1033 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
1034 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
1036 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
1037 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
1038 int_aarch64_neon_vpmaxnm,
1039 v2f32, v4f32, v2f64, 1>;
1041 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
1042 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
1043 int_aarch64_neon_vpminnm,
1044 v2f32, v4f32, v2f64, 1>;
1046 // Vector Addition Pairwise (Integer)
1047 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
1049 // Vector Addition Pairwise (Floating Point)
1050 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
1052 v2f32, v4f32, v2f64, 1>;
1054 // Vector Saturating Doubling Multiply High
1055 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
1056 int_arm_neon_vqdmulh, 1>;
1058 // Vector Saturating Rouding Doubling Multiply High
1059 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
1060 int_arm_neon_vqrdmulh, 1>;
1062 // Vector Multiply Extended (Floating Point)
1063 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
1064 int_aarch64_neon_vmulx,
1065 v2f32, v4f32, v2f64, 1>;
1067 // Patterns to match llvm.aarch64.* intrinsic for
1068 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
1069 class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
1070 : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
1072 (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
1075 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
1076 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
1077 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
1078 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
1079 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
1081 // Vector Immediate Instructions
1083 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
1085 def _asmoperand : AsmOperandClass
1087 let Name = "NeonMovImmShift" # PREFIX;
1088 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
1089 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
1093 // Definition of vector immediates shift operands
1095 // The selectable use-cases extract the shift operation
1096 // information from the OpCmode fields encoded in the immediate.
1097 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
1098 uint64_t OpCmode = N->getZExtValue();
1100 unsigned ShiftOnesIn;
1102 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1103 if (!HasShift) return SDValue();
1104 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1107 // Vector immediates shift operands which accept LSL and MSL
1108 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1109 // or 0, 8 (LSLH) or 8, 16 (MSL).
1110 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1111 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1112 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
1113 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1115 multiclass neon_mov_imm_shift_operands<string PREFIX,
1116 string HALF, string ISHALF, code pred>
1118 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1121 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1123 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1124 let ParserMatchClass =
1125 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1129 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1131 unsigned ShiftOnesIn;
1133 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1134 return (HasShift && !ShiftOnesIn);
1137 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1139 unsigned ShiftOnesIn;
1141 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1142 return (HasShift && ShiftOnesIn);
1145 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1147 unsigned ShiftOnesIn;
1149 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1150 return (HasShift && !ShiftOnesIn);
1153 def neon_uimm1_asmoperand : AsmOperandClass
1156 let PredicateMethod = "isUImm<1>";
1157 let RenderMethod = "addImmOperands";
1160 def neon_uimm2_asmoperand : AsmOperandClass
1163 let PredicateMethod = "isUImm<2>";
1164 let RenderMethod = "addImmOperands";
1167 def neon_uimm8_asmoperand : AsmOperandClass
1170 let PredicateMethod = "isUImm<8>";
1171 let RenderMethod = "addImmOperands";
1174 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1175 let ParserMatchClass = neon_uimm8_asmoperand;
1176 let PrintMethod = "printUImmHexOperand";
1179 def neon_uimm64_mask_asmoperand : AsmOperandClass
1181 let Name = "NeonUImm64Mask";
1182 let PredicateMethod = "isNeonUImm64Mask";
1183 let RenderMethod = "addNeonUImm64MaskOperands";
1186 // MCOperand for 64-bit bytemask with each byte having only the
1187 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1188 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1189 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1190 let PrintMethod = "printNeonUImm64MaskOperand";
1193 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1194 SDPatternOperator opnode>
1196 // shift zeros, per word
1197 def _2S : NeonI_1VModImm<0b0, op,
1199 (ins neon_uimm8:$Imm,
1200 neon_mov_imm_LSL_operand:$Simm),
1201 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1202 [(set (v2i32 VPR64:$Rd),
1203 (v2i32 (opnode (timm:$Imm),
1204 (neon_mov_imm_LSL_operand:$Simm))))],
1207 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1210 def _4S : NeonI_1VModImm<0b1, op,
1212 (ins neon_uimm8:$Imm,
1213 neon_mov_imm_LSL_operand:$Simm),
1214 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1215 [(set (v4i32 VPR128:$Rd),
1216 (v4i32 (opnode (timm:$Imm),
1217 (neon_mov_imm_LSL_operand:$Simm))))],
1220 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1223 // shift zeros, per halfword
1224 def _4H : NeonI_1VModImm<0b0, op,
1226 (ins neon_uimm8:$Imm,
1227 neon_mov_imm_LSLH_operand:$Simm),
1228 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1229 [(set (v4i16 VPR64:$Rd),
1230 (v4i16 (opnode (timm:$Imm),
1231 (neon_mov_imm_LSLH_operand:$Simm))))],
1234 let cmode = {0b1, 0b0, Simm, 0b0};
1237 def _8H : NeonI_1VModImm<0b1, op,
1239 (ins neon_uimm8:$Imm,
1240 neon_mov_imm_LSLH_operand:$Simm),
1241 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1242 [(set (v8i16 VPR128:$Rd),
1243 (v8i16 (opnode (timm:$Imm),
1244 (neon_mov_imm_LSLH_operand:$Simm))))],
1247 let cmode = {0b1, 0b0, Simm, 0b0};
1251 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1252 SDPatternOperator opnode,
1253 SDPatternOperator neonopnode>
1255 let Constraints = "$src = $Rd" in {
1256 // shift zeros, per word
1257 def _2S : NeonI_1VModImm<0b0, op,
1259 (ins VPR64:$src, neon_uimm8:$Imm,
1260 neon_mov_imm_LSL_operand:$Simm),
1261 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1262 [(set (v2i32 VPR64:$Rd),
1263 (v2i32 (opnode (v2i32 VPR64:$src),
1264 (v2i32 (neonopnode timm:$Imm,
1265 neon_mov_imm_LSL_operand:$Simm)))))],
1268 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1271 def _4S : NeonI_1VModImm<0b1, op,
1273 (ins VPR128:$src, neon_uimm8:$Imm,
1274 neon_mov_imm_LSL_operand:$Simm),
1275 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1276 [(set (v4i32 VPR128:$Rd),
1277 (v4i32 (opnode (v4i32 VPR128:$src),
1278 (v4i32 (neonopnode timm:$Imm,
1279 neon_mov_imm_LSL_operand:$Simm)))))],
1282 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1285 // shift zeros, per halfword
1286 def _4H : NeonI_1VModImm<0b0, op,
1288 (ins VPR64:$src, neon_uimm8:$Imm,
1289 neon_mov_imm_LSLH_operand:$Simm),
1290 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1291 [(set (v4i16 VPR64:$Rd),
1292 (v4i16 (opnode (v4i16 VPR64:$src),
1293 (v4i16 (neonopnode timm:$Imm,
1294 neon_mov_imm_LSL_operand:$Simm)))))],
1297 let cmode = {0b1, 0b0, Simm, 0b1};
1300 def _8H : NeonI_1VModImm<0b1, op,
1302 (ins VPR128:$src, neon_uimm8:$Imm,
1303 neon_mov_imm_LSLH_operand:$Simm),
1304 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1305 [(set (v8i16 VPR128:$Rd),
1306 (v8i16 (opnode (v8i16 VPR128:$src),
1307 (v8i16 (neonopnode timm:$Imm,
1308 neon_mov_imm_LSL_operand:$Simm)))))],
1311 let cmode = {0b1, 0b0, Simm, 0b1};
1316 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1317 SDPatternOperator opnode>
1319 // shift ones, per word
1320 def _2S : NeonI_1VModImm<0b0, op,
1322 (ins neon_uimm8:$Imm,
1323 neon_mov_imm_MSL_operand:$Simm),
1324 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1325 [(set (v2i32 VPR64:$Rd),
1326 (v2i32 (opnode (timm:$Imm),
1327 (neon_mov_imm_MSL_operand:$Simm))))],
1330 let cmode = {0b1, 0b1, 0b0, Simm};
1333 def _4S : NeonI_1VModImm<0b1, op,
1335 (ins neon_uimm8:$Imm,
1336 neon_mov_imm_MSL_operand:$Simm),
1337 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1338 [(set (v4i32 VPR128:$Rd),
1339 (v4i32 (opnode (timm:$Imm),
1340 (neon_mov_imm_MSL_operand:$Simm))))],
1343 let cmode = {0b1, 0b1, 0b0, Simm};
1347 // Vector Move Immediate Shifted
1348 let isReMaterializable = 1 in {
1349 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1352 // Vector Move Inverted Immediate Shifted
1353 let isReMaterializable = 1 in {
1354 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1357 // Vector Bitwise Bit Clear (AND NOT) - immediate
1358 let isReMaterializable = 1 in {
1359 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1363 // Vector Bitwise OR - immedidate
1365 let isReMaterializable = 1 in {
1366 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1370 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1371 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1372 // BIC immediate instructions selection requires additional patterns to
1373 // transform Neon_movi operands into BIC immediate operands
1375 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1376 uint64_t OpCmode = N->getZExtValue();
1378 unsigned ShiftOnesIn;
1379 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1380 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1381 // Transform encoded shift amount 0 to 1 and 1 to 0.
1382 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1385 def neon_mov_imm_LSLH_transform_operand
1388 unsigned ShiftOnesIn;
1390 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1391 return (HasShift && !ShiftOnesIn); }],
1392 neon_mov_imm_LSLH_transform_XFORM>;
1394 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0xff, LSL 8)
1395 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0xff)
1396 def : Pat<(v4i16 (and VPR64:$src,
1397 (v4i16 (Neon_movi 255,
1398 neon_mov_imm_LSLH_transform_operand:$Simm)))),
1399 (BICvi_lsl_4H VPR64:$src, 255,
1400 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1402 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0xff, LSL 8)
1403 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0xff)
1404 def : Pat<(v8i16 (and VPR128:$src,
1405 (v8i16 (Neon_movi 255,
1406 neon_mov_imm_LSLH_transform_operand:$Simm)))),
1407 (BICvi_lsl_8H VPR128:$src, 255,
1408 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1410 def : Pat<(v8i8 (and VPR64:$src,
1411 (bitconvert(v4i16 (Neon_movi 255,
1412 neon_mov_imm_LSLH_transform_operand:$Simm))))),
1413 (BICvi_lsl_4H VPR64:$src, 255,
1414 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1415 def : Pat<(v2i32 (and VPR64:$src,
1416 (bitconvert(v4i16 (Neon_movi 255,
1417 neon_mov_imm_LSLH_transform_operand:$Simm))))),
1418 (BICvi_lsl_4H VPR64:$src, 255,
1419 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1420 def : Pat<(v1i64 (and VPR64:$src,
1421 (bitconvert(v4i16 (Neon_movi 255,
1422 neon_mov_imm_LSLH_transform_operand:$Simm))))),
1423 (BICvi_lsl_4H VPR64:$src, 255,
1424 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1426 def : Pat<(v16i8 (and VPR128:$src,
1427 (bitconvert(v8i16 (Neon_movi 255,
1428 neon_mov_imm_LSLH_transform_operand:$Simm))))),
1429 (BICvi_lsl_8H VPR128:$src, 255,
1430 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1431 def : Pat<(v4i32 (and VPR128:$src,
1432 (bitconvert(v8i16 (Neon_movi 255,
1433 neon_mov_imm_LSLH_transform_operand:$Simm))))),
1434 (BICvi_lsl_8H VPR128:$src, 255,
1435 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1436 def : Pat<(v2i64 (and VPR128:$src,
1437 (bitconvert(v8i16 (Neon_movi 255,
1438 neon_mov_imm_LSLH_transform_operand:$Simm))))),
1439 (BICvi_lsl_8H VPR128:$src, 255,
1440 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1442 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1443 SDPatternOperator neonopnode,
1447 Instruction INST4S> {
1448 def : Pat<(v8i8 (opnode VPR64:$src,
1449 (bitconvert(v4i16 (neonopnode timm:$Imm,
1450 neon_mov_imm_LSLH_operand:$Simm))))),
1451 (INST4H VPR64:$src, neon_uimm8:$Imm,
1452 neon_mov_imm_LSLH_operand:$Simm)>;
1453 def : Pat<(v2i32 (opnode VPR64:$src,
1454 (bitconvert(v4i16 (neonopnode timm:$Imm,
1455 neon_mov_imm_LSLH_operand:$Simm))))),
1456 (INST4H VPR64:$src, neon_uimm8:$Imm,
1457 neon_mov_imm_LSLH_operand:$Simm)>;
1458 def : Pat<(v1i64 (opnode VPR64:$src,
1459 (bitconvert(v4i16 (neonopnode timm:$Imm,
1460 neon_mov_imm_LSLH_operand:$Simm))))),
1461 (INST4H VPR64:$src, neon_uimm8:$Imm,
1462 neon_mov_imm_LSLH_operand:$Simm)>;
1464 def : Pat<(v16i8 (opnode VPR128:$src,
1465 (bitconvert(v8i16 (neonopnode timm:$Imm,
1466 neon_mov_imm_LSLH_operand:$Simm))))),
1467 (INST8H VPR128:$src, neon_uimm8:$Imm,
1468 neon_mov_imm_LSLH_operand:$Simm)>;
1469 def : Pat<(v4i32 (opnode VPR128:$src,
1470 (bitconvert(v8i16 (neonopnode timm:$Imm,
1471 neon_mov_imm_LSLH_operand:$Simm))))),
1472 (INST8H VPR128:$src, neon_uimm8:$Imm,
1473 neon_mov_imm_LSLH_operand:$Simm)>;
1474 def : Pat<(v2i64 (opnode VPR128:$src,
1475 (bitconvert(v8i16 (neonopnode timm:$Imm,
1476 neon_mov_imm_LSLH_operand:$Simm))))),
1477 (INST8H VPR128:$src, neon_uimm8:$Imm,
1478 neon_mov_imm_LSLH_operand:$Simm)>;
1480 def : Pat<(v8i8 (opnode VPR64:$src,
1481 (bitconvert(v2i32 (neonopnode timm:$Imm,
1482 neon_mov_imm_LSLH_operand:$Simm))))),
1483 (INST2S VPR64:$src, neon_uimm8:$Imm,
1484 neon_mov_imm_LSLH_operand:$Simm)>;
1485 def : Pat<(v4i16 (opnode VPR64:$src,
1486 (bitconvert(v2i32 (neonopnode timm:$Imm,
1487 neon_mov_imm_LSLH_operand:$Simm))))),
1488 (INST2S VPR64:$src, neon_uimm8:$Imm,
1489 neon_mov_imm_LSLH_operand:$Simm)>;
1490 def : Pat<(v1i64 (opnode VPR64:$src,
1491 (bitconvert(v2i32 (neonopnode timm:$Imm,
1492 neon_mov_imm_LSLH_operand:$Simm))))),
1493 (INST2S VPR64:$src, neon_uimm8:$Imm,
1494 neon_mov_imm_LSLH_operand:$Simm)>;
1496 def : Pat<(v16i8 (opnode VPR128:$src,
1497 (bitconvert(v4i32 (neonopnode timm:$Imm,
1498 neon_mov_imm_LSLH_operand:$Simm))))),
1499 (INST4S VPR128:$src, neon_uimm8:$Imm,
1500 neon_mov_imm_LSLH_operand:$Simm)>;
1501 def : Pat<(v8i16 (opnode VPR128:$src,
1502 (bitconvert(v4i32 (neonopnode timm:$Imm,
1503 neon_mov_imm_LSLH_operand:$Simm))))),
1504 (INST4S VPR128:$src, neon_uimm8:$Imm,
1505 neon_mov_imm_LSLH_operand:$Simm)>;
1506 def : Pat<(v2i64 (opnode VPR128:$src,
1507 (bitconvert(v4i32 (neonopnode timm:$Imm,
1508 neon_mov_imm_LSLH_operand:$Simm))))),
1509 (INST4S VPR128:$src, neon_uimm8:$Imm,
1510 neon_mov_imm_LSLH_operand:$Simm)>;
1513 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1514 defm : Neon_bitwiseVi_patterns<and, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H,
1515 BICvi_lsl_2S, BICvi_lsl_4S>;
1517 // Additional patterns for Vector Bitwise OR - immedidate
1518 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H,
1519 ORRvi_lsl_2S, ORRvi_lsl_4S>;
1522 // Vector Move Immediate Masked
1523 let isReMaterializable = 1 in {
1524 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1527 // Vector Move Inverted Immediate Masked
1528 let isReMaterializable = 1 in {
1529 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1532 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1533 Instruction inst, RegisterOperand VPRC>
1534 : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1535 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1537 // Aliases for Vector Move Immediate Shifted
1538 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1539 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1540 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1541 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1543 // Aliases for Vector Move Inverted Immediate Shifted
1544 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1545 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1546 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1547 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1549 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1550 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1551 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1552 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1553 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1555 // Aliases for Vector Bitwise OR - immedidate
1556 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1557 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1558 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1559 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1561 // Vector Move Immediate - per byte
1562 let isReMaterializable = 1 in {
1563 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1564 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1565 "movi\t$Rd.8b, $Imm",
1566 [(set (v8i8 VPR64:$Rd),
1567 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1572 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1573 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1574 "movi\t$Rd.16b, $Imm",
1575 [(set (v16i8 VPR128:$Rd),
1576 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1582 // Vector Move Immediate - bytemask, per double word
1583 let isReMaterializable = 1 in {
1584 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1585 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1586 "movi\t $Rd.2d, $Imm",
1587 [(set (v2i64 VPR128:$Rd),
1588 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1594 // Vector Move Immediate - bytemask, one doubleword
1596 let isReMaterializable = 1 in {
1597 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1598 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1600 [(set (v1i64 FPR64:$Rd),
1601 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1607 // Vector Floating Point Move Immediate
1609 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1610 Operand immOpType, bit q, bit op>
1611 : NeonI_1VModImm<q, op,
1612 (outs VPRC:$Rd), (ins immOpType:$Imm),
1613 "fmov\t$Rd" # asmlane # ", $Imm",
1614 [(set (OpTy VPRC:$Rd),
1615 (OpTy (Neon_fmovi (timm:$Imm))))],
1620 let isReMaterializable = 1 in {
1621 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1622 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1623 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1626 // Vector Shift (Immediate)
1628 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1632 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1633 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1634 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1635 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1637 // The shift right immediate amount, in the range 1 to element bits, is computed
1638 // as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
1639 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1641 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1642 let Name = "ShrImm" # OFFSET;
1643 let RenderMethod = "addImmOperands";
1644 let DiagnosticType = "ShrImm" # OFFSET;
1647 class shr_imm<string OFFSET> : Operand<i32> {
1648 let EncoderMethod = "getShiftRightImm" # OFFSET;
1649 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1650 let ParserMatchClass =
1651 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1654 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1655 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1656 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1657 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1659 def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
1660 def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
1661 def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
1662 def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
1664 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1665 let Name = "ShlImm" # OFFSET;
1666 let RenderMethod = "addImmOperands";
1667 let DiagnosticType = "ShlImm" # OFFSET;
1670 class shl_imm<string OFFSET> : Operand<i32> {
1671 let EncoderMethod = "getShiftLeftImm" # OFFSET;
1672 let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1673 let ParserMatchClass =
1674 !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1677 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1678 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1679 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1680 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1682 def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
1683 def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
1684 def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
1685 def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
1687 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1688 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1689 : NeonI_2VShiftImm<q, u, opcode,
1690 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1691 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1692 [(set (Ty VPRC:$Rd),
1693 (Ty (OpNode (Ty VPRC:$Rn),
1694 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
1697 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1698 // 64-bit vector types.
1699 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
1700 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1703 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
1704 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1707 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
1708 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1711 // 128-bit vector types.
1712 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
1713 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1716 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
1717 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1720 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
1721 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1724 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
1725 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1729 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1730 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1732 let Inst{22-19} = 0b0001;
1735 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1737 let Inst{22-20} = 0b001;
1740 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1742 let Inst{22-21} = 0b01;
1745 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1747 let Inst{22-19} = 0b0001;
1750 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1752 let Inst{22-20} = 0b001;
1755 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1757 let Inst{22-21} = 0b01;
1760 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1768 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1770 // Additional patterns to match vector shift left by immediate.
1771 // (v1i8/v1i16/v1i32 types)
1772 def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn),
1773 (v1i8 (Neon_vdup (i32 (shl_imm8:$Imm)))))),
1775 (SHLvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
1778 def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn),
1779 (v1i16 (Neon_vdup (i32 (shl_imm16:$Imm)))))),
1781 (SHLvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
1784 def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn),
1785 (v1i32 (Neon_vdup (i32 (shl_imm32:$Imm)))))),
1787 (SHLvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
1792 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1793 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1795 // Additional patterns to match vector shift right by immediate.
1796 // (v1i8/v1i16/v1i32 types)
1797 def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn),
1798 (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))),
1800 (SSHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
1803 def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn),
1804 (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))),
1806 (SSHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
1809 def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn),
1810 (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))),
1812 (SSHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
1815 def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn),
1816 (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))),
1818 (USHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
1821 def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn),
1822 (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))),
1824 (USHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
1827 def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn),
1828 (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))),
1830 (USHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
1834 def Neon_High16B : PatFrag<(ops node:$in),
1835 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1836 def Neon_High8H : PatFrag<(ops node:$in),
1837 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1838 def Neon_High4S : PatFrag<(ops node:$in),
1839 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1840 def Neon_High2D : PatFrag<(ops node:$in),
1841 (extract_subvector (v2i64 node:$in), (iPTR 1))>;
1842 def Neon_High4float : PatFrag<(ops node:$in),
1843 (extract_subvector (v4f32 node:$in), (iPTR 2))>;
1844 def Neon_High2double : PatFrag<(ops node:$in),
1845 (extract_subvector (v2f64 node:$in), (iPTR 1))>;
1847 def Neon_Low16B : PatFrag<(ops node:$in),
1848 (v8i8 (extract_subvector (v16i8 node:$in),
1850 def Neon_Low8H : PatFrag<(ops node:$in),
1851 (v4i16 (extract_subvector (v8i16 node:$in),
1853 def Neon_Low4S : PatFrag<(ops node:$in),
1854 (v2i32 (extract_subvector (v4i32 node:$in),
1856 def Neon_Low2D : PatFrag<(ops node:$in),
1857 (v1i64 (extract_subvector (v2i64 node:$in),
1859 def Neon_Low4float : PatFrag<(ops node:$in),
1860 (v2f32 (extract_subvector (v4f32 node:$in),
1862 def Neon_Low2double : PatFrag<(ops node:$in),
1863 (v1f64 (extract_subvector (v2f64 node:$in),
1866 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1867 string SrcT, ValueType DestTy, ValueType SrcTy,
1868 Operand ImmTy, SDPatternOperator ExtOp>
1869 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1870 (ins VPR64:$Rn, ImmTy:$Imm),
1871 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1872 [(set (DestTy VPR128:$Rd),
1874 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1875 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1878 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1879 string SrcT, ValueType DestTy, ValueType SrcTy,
1880 int StartIndex, Operand ImmTy,
1881 SDPatternOperator ExtOp, PatFrag getTop>
1882 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1883 (ins VPR128:$Rn, ImmTy:$Imm),
1884 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1885 [(set (DestTy VPR128:$Rd),
1888 (SrcTy (getTop VPR128:$Rn)))),
1889 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1892 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1894 // 64-bit vector types.
1895 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1897 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1900 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1902 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1905 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1907 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1910 // 128-bit vector types
1911 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1912 8, shl_imm8, ExtOp, Neon_High16B> {
1913 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1916 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1917 4, shl_imm16, ExtOp, Neon_High8H> {
1918 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1921 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1922 2, shl_imm32, ExtOp, Neon_High4S> {
1923 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1926 // Use other patterns to match when the immediate is 0.
1927 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1928 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1930 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1931 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1933 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1934 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1936 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1937 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1939 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1940 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1942 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1943 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1947 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1948 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1950 class NeonI_ext_len_alias<string asmop, string lane, string laneOp,
1951 Instruction inst, RegisterOperand VPRC,
1952 RegisterOperand VPRCOp>
1953 : NeonInstAlias<asmop # "\t$Rd" # lane #", $Rn" # laneOp,
1954 (inst VPRC:$Rd, VPRCOp:$Rn, 0), 0b0>;
1956 // Signed integer lengthen (vector) is alias for SSHLL Vd, Vn, #0
1957 // Signed integer lengthen (vector, second part) is alias for SSHLL2 Vd, Vn, #0
1958 // FIXME: This is actually the preferred syntax but TableGen can't deal with
1959 // custom printing of aliases.
1960 def SXTLvv_8B : NeonI_ext_len_alias<"sxtl", ".8h", ".8b", SSHLLvvi_8B, VPR128, VPR64>;
1961 def SXTLvv_4H : NeonI_ext_len_alias<"sxtl", ".4s", ".4h", SSHLLvvi_4H, VPR128, VPR64>;
1962 def SXTLvv_2S : NeonI_ext_len_alias<"sxtl", ".2d", ".2s", SSHLLvvi_2S, VPR128, VPR64>;
1963 def SXTL2vv_16B : NeonI_ext_len_alias<"sxtl2", ".8h", ".16b", SSHLLvvi_16B, VPR128, VPR128>;
1964 def SXTL2vv_8H : NeonI_ext_len_alias<"sxtl2", ".4s", ".8h", SSHLLvvi_8H, VPR128, VPR128>;
1965 def SXTL2vv_4S : NeonI_ext_len_alias<"sxtl2", ".2d", ".4s", SSHLLvvi_4S, VPR128, VPR128>;
1967 // Unsigned integer lengthen (vector) is alias for USHLL Vd, Vn, #0
1968 // Unsigned integer lengthen (vector, second part) is alias for USHLL2 Vd, Vn, #0
1969 // FIXME: This is actually the preferred syntax but TableGen can't deal with
1970 // custom printing of aliases.
1971 def UXTLvv_8B : NeonI_ext_len_alias<"uxtl", ".8h", ".8b", USHLLvvi_8B, VPR128, VPR64>;
1972 def UXTLvv_4H : NeonI_ext_len_alias<"uxtl", ".4s", ".4h", USHLLvvi_4H, VPR128, VPR64>;
1973 def UXTLvv_2S : NeonI_ext_len_alias<"uxtl", ".2d", ".2s", USHLLvvi_2S, VPR128, VPR64>;
1974 def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b", USHLLvvi_16B, VPR128, VPR128>;
1975 def UXTL2vv_8H : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h", USHLLvvi_8H, VPR128, VPR128>;
1976 def UXTL2vv_4S : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s", USHLLvvi_4S, VPR128, VPR128>;
1978 def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>;
1979 def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>;
1980 def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>;
1982 // Rounding/Saturating shift
1983 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1984 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1985 SDPatternOperator OpNode>
1986 : NeonI_2VShiftImm<q, u, opcode,
1987 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1988 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1989 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1990 (i32 ImmTy:$Imm))))],
1993 // shift right (vector by immediate)
1994 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1995 SDPatternOperator OpNode> {
1996 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1998 let Inst{22-19} = 0b0001;
2001 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2003 let Inst{22-20} = 0b001;
2006 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2008 let Inst{22-21} = 0b01;
2011 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2013 let Inst{22-19} = 0b0001;
2016 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2018 let Inst{22-20} = 0b001;
2021 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2023 let Inst{22-21} = 0b01;
2026 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2032 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
2033 SDPatternOperator OpNode> {
2034 // 64-bit vector types.
2035 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2037 let Inst{22-19} = 0b0001;
2040 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2042 let Inst{22-20} = 0b001;
2045 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2047 let Inst{22-21} = 0b01;
2050 // 128-bit vector types.
2051 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2053 let Inst{22-19} = 0b0001;
2056 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2058 let Inst{22-20} = 0b001;
2061 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2063 let Inst{22-21} = 0b01;
2066 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2072 // Rounding shift right
2073 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
2074 int_aarch64_neon_vsrshr>;
2075 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
2076 int_aarch64_neon_vurshr>;
2078 // Saturating shift left unsigned
2079 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
2081 // Saturating shift left
2082 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
2083 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
2085 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
2086 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2088 : NeonI_2VShiftImm<q, u, opcode,
2089 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2090 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2091 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
2092 (Ty (OpNode (Ty VPRC:$Rn),
2093 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
2095 let Constraints = "$src = $Rd";
2098 // Shift Right accumulate
2099 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
2100 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2102 let Inst{22-19} = 0b0001;
2105 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2107 let Inst{22-20} = 0b001;
2110 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2112 let Inst{22-21} = 0b01;
2115 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2117 let Inst{22-19} = 0b0001;
2120 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2122 let Inst{22-20} = 0b001;
2125 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2127 let Inst{22-21} = 0b01;
2130 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2136 // Shift right and accumulate
2137 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
2138 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
2140 // Rounding shift accumulate
2141 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
2142 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2143 SDPatternOperator OpNode>
2144 : NeonI_2VShiftImm<q, u, opcode,
2145 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2146 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2147 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
2148 (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
2150 let Constraints = "$src = $Rd";
2153 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
2154 SDPatternOperator OpNode> {
2155 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2157 let Inst{22-19} = 0b0001;
2160 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2162 let Inst{22-20} = 0b001;
2165 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2167 let Inst{22-21} = 0b01;
2170 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2172 let Inst{22-19} = 0b0001;
2175 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2177 let Inst{22-20} = 0b001;
2180 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2182 let Inst{22-21} = 0b01;
2185 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2191 // Rounding shift right and accumulate
2192 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
2193 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
2195 // Shift insert by immediate
2196 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
2197 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
2198 SDPatternOperator OpNode>
2199 : NeonI_2VShiftImm<q, u, opcode,
2200 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
2201 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2202 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
2203 (i32 ImmTy:$Imm))))],
2205 let Constraints = "$src = $Rd";
2208 // shift left insert (vector by immediate)
2209 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
2210 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2211 int_aarch64_neon_vsli> {
2212 let Inst{22-19} = 0b0001;
2215 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2216 int_aarch64_neon_vsli> {
2217 let Inst{22-20} = 0b001;
2220 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2221 int_aarch64_neon_vsli> {
2222 let Inst{22-21} = 0b01;
2225 // 128-bit vector types
2226 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2227 int_aarch64_neon_vsli> {
2228 let Inst{22-19} = 0b0001;
2231 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2232 int_aarch64_neon_vsli> {
2233 let Inst{22-20} = 0b001;
2236 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2237 int_aarch64_neon_vsli> {
2238 let Inst{22-21} = 0b01;
2241 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2242 int_aarch64_neon_vsli> {
2247 // shift right insert (vector by immediate)
2248 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
2249 // 64-bit vector types.
2250 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2251 int_aarch64_neon_vsri> {
2252 let Inst{22-19} = 0b0001;
2255 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2256 int_aarch64_neon_vsri> {
2257 let Inst{22-20} = 0b001;
2260 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2261 int_aarch64_neon_vsri> {
2262 let Inst{22-21} = 0b01;
2265 // 128-bit vector types
2266 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2267 int_aarch64_neon_vsri> {
2268 let Inst{22-19} = 0b0001;
2271 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2272 int_aarch64_neon_vsri> {
2273 let Inst{22-20} = 0b001;
2276 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2277 int_aarch64_neon_vsri> {
2278 let Inst{22-21} = 0b01;
2281 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2282 int_aarch64_neon_vsri> {
2287 // Shift left and insert
2288 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2290 // Shift right and insert
2291 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2293 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2294 string SrcT, Operand ImmTy>
2295 : NeonI_2VShiftImm<q, u, opcode,
2296 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2297 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2300 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2301 string SrcT, Operand ImmTy>
2302 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2303 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2304 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2306 let Constraints = "$src = $Rd";
2309 // left long shift by immediate
2310 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2311 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2312 let Inst{22-19} = 0b0001;
2315 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2316 let Inst{22-20} = 0b001;
2319 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2320 let Inst{22-21} = 0b01;
2323 // Shift Narrow High
2324 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2326 let Inst{22-19} = 0b0001;
2329 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2331 let Inst{22-20} = 0b001;
2334 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2336 let Inst{22-21} = 0b01;
2340 // Shift right narrow
2341 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2343 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2344 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2345 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2346 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2347 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2348 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2349 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2350 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2352 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2353 (v2i64 (concat_vectors (v1i64 node:$Rm),
2354 (v1i64 node:$Rn)))>;
2355 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2356 (v8i16 (concat_vectors (v4i16 node:$Rm),
2357 (v4i16 node:$Rn)))>;
2358 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2359 (v4i32 (concat_vectors (v2i32 node:$Rm),
2360 (v2i32 node:$Rn)))>;
2361 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2362 (v4f32 (concat_vectors (v2f32 node:$Rm),
2363 (v2f32 node:$Rn)))>;
2364 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2365 (v2f64 (concat_vectors (v1f64 node:$Rm),
2366 (v1f64 node:$Rn)))>;
2368 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2369 (v8i16 (srl (v8i16 node:$lhs),
2370 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2371 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2372 (v4i32 (srl (v4i32 node:$lhs),
2373 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2374 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2375 (v2i64 (srl (v2i64 node:$lhs),
2376 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2377 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2378 (v8i16 (sra (v8i16 node:$lhs),
2379 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2380 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2381 (v4i32 (sra (v4i32 node:$lhs),
2382 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2383 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2384 (v2i64 (sra (v2i64 node:$lhs),
2385 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2387 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2388 multiclass Neon_shiftNarrow_patterns<string shr> {
2389 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2390 (i32 shr_imm8:$Imm)))),
2391 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2392 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2393 (i32 shr_imm16:$Imm)))),
2394 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2395 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2396 (i32 shr_imm32:$Imm)))),
2397 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2399 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2400 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2401 VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
2402 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2403 VPR128:$Rn, imm:$Imm)>;
2404 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2405 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2406 VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
2407 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2408 VPR128:$Rn, imm:$Imm)>;
2409 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2410 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2411 VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
2412 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2413 VPR128:$Rn, imm:$Imm)>;
2416 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2417 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
2418 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2419 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
2420 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2421 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
2422 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2424 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2425 (v1i64 (bitconvert (v8i8
2426 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
2427 (!cast<Instruction>(prefix # "_16B")
2428 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2429 VPR128:$Rn, imm:$Imm)>;
2430 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2431 (v1i64 (bitconvert (v4i16
2432 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
2433 (!cast<Instruction>(prefix # "_8H")
2434 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2435 VPR128:$Rn, imm:$Imm)>;
2436 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2437 (v1i64 (bitconvert (v2i32
2438 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
2439 (!cast<Instruction>(prefix # "_4S")
2440 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2441 VPR128:$Rn, imm:$Imm)>;
2444 defm : Neon_shiftNarrow_patterns<"lshr">;
2445 defm : Neon_shiftNarrow_patterns<"ashr">;
2447 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2448 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2449 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2450 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2451 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2452 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2453 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2455 // Convert fix-point and float-pointing
2456 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2457 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2458 Operand ImmTy, SDPatternOperator IntOp>
2459 : NeonI_2VShiftImm<q, u, opcode,
2460 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2461 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2462 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2463 (i32 ImmTy:$Imm))))],
2466 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2467 SDPatternOperator IntOp> {
2468 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2470 let Inst{22-21} = 0b01;
2473 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2475 let Inst{22-21} = 0b01;
2478 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2484 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2485 SDPatternOperator IntOp> {
2486 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2488 let Inst{22-21} = 0b01;
2491 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2493 let Inst{22-21} = 0b01;
2496 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2502 // Convert fixed-point to floating-point
2503 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2504 int_arm_neon_vcvtfxs2fp>;
2505 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2506 int_arm_neon_vcvtfxu2fp>;
2508 // Convert floating-point to fixed-point
2509 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2510 int_arm_neon_vcvtfp2fxs>;
2511 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2512 int_arm_neon_vcvtfp2fxu>;
2514 multiclass Neon_sshll2_0<SDNode ext>
2516 def _v8i8 : PatFrag<(ops node:$Rn),
2517 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2518 def _v4i16 : PatFrag<(ops node:$Rn),
2519 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2520 def _v2i32 : PatFrag<(ops node:$Rn),
2521 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2524 defm NI_sext_high : Neon_sshll2_0<sext>;
2525 defm NI_zext_high : Neon_sshll2_0<zext>;
2528 //===----------------------------------------------------------------------===//
2529 // Multiclasses for NeonI_Across
2530 //===----------------------------------------------------------------------===//
2534 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2535 string asmop, SDPatternOperator opnode>
2537 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2538 (outs FPR16:$Rd), (ins VPR64:$Rn),
2539 asmop # "\t$Rd, $Rn.8b",
2540 [(set (v1i16 FPR16:$Rd),
2541 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2544 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2545 (outs FPR16:$Rd), (ins VPR128:$Rn),
2546 asmop # "\t$Rd, $Rn.16b",
2547 [(set (v1i16 FPR16:$Rd),
2548 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2551 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2552 (outs FPR32:$Rd), (ins VPR64:$Rn),
2553 asmop # "\t$Rd, $Rn.4h",
2554 [(set (v1i32 FPR32:$Rd),
2555 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2558 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2559 (outs FPR32:$Rd), (ins VPR128:$Rn),
2560 asmop # "\t$Rd, $Rn.8h",
2561 [(set (v1i32 FPR32:$Rd),
2562 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2565 // _1d2s doesn't exist!
2567 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2568 (outs FPR64:$Rd), (ins VPR128:$Rn),
2569 asmop # "\t$Rd, $Rn.4s",
2570 [(set (v1i64 FPR64:$Rd),
2571 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2575 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2576 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2580 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2581 string asmop, SDPatternOperator opnode>
2583 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2584 (outs FPR8:$Rd), (ins VPR64:$Rn),
2585 asmop # "\t$Rd, $Rn.8b",
2586 [(set (v1i8 FPR8:$Rd),
2587 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2590 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2591 (outs FPR8:$Rd), (ins VPR128:$Rn),
2592 asmop # "\t$Rd, $Rn.16b",
2593 [(set (v1i8 FPR8:$Rd),
2594 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2597 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2598 (outs FPR16:$Rd), (ins VPR64:$Rn),
2599 asmop # "\t$Rd, $Rn.4h",
2600 [(set (v1i16 FPR16:$Rd),
2601 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2604 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2605 (outs FPR16:$Rd), (ins VPR128:$Rn),
2606 asmop # "\t$Rd, $Rn.8h",
2607 [(set (v1i16 FPR16:$Rd),
2608 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2611 // _1s2s doesn't exist!
2613 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2614 (outs FPR32:$Rd), (ins VPR128:$Rn),
2615 asmop # "\t$Rd, $Rn.4s",
2616 [(set (v1i32 FPR32:$Rd),
2617 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2621 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2622 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2624 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2625 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2627 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2631 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2632 string asmop, SDPatternOperator opnode> {
2633 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2634 (outs FPR32:$Rd), (ins VPR128:$Rn),
2635 asmop # "\t$Rd, $Rn.4s",
2636 [(set (f32 FPR32:$Rd),
2637 (f32 (opnode (v4f32 VPR128:$Rn))))],
2641 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2642 int_aarch64_neon_vmaxnmv>;
2643 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2644 int_aarch64_neon_vminnmv>;
2646 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2647 int_aarch64_neon_vmaxv>;
2648 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2649 int_aarch64_neon_vminv>;
2651 // The followings are for instruction class (Perm)
2653 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2654 string asmop, RegisterOperand OpVPR, string OpS,
2655 SDPatternOperator opnode, ValueType Ty>
2656 : NeonI_Perm<q, size, opcode,
2657 (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2658 asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2659 [(set (Ty OpVPR:$Rd),
2660 (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
2663 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
2664 SDPatternOperator opnode> {
2665 def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop,
2666 VPR64, "8b", opnode, v8i8>;
2667 def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
2668 VPR128, "16b",opnode, v16i8>;
2669 def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop,
2670 VPR64, "4h", opnode, v4i16>;
2671 def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop,
2672 VPR128, "8h", opnode, v8i16>;
2673 def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop,
2674 VPR64, "2s", opnode, v2i32>;
2675 def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop,
2676 VPR128, "4s", opnode, v4i32>;
2677 def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop,
2678 VPR128, "2d", opnode, v2i64>;
2681 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
2682 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
2683 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
2684 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
2685 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
2686 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
2688 multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
2689 def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
2690 (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
2692 def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
2693 (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
2695 def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
2696 (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
2699 defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
2700 defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
2701 defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
2702 defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
2703 defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
2704 defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
2706 // The followings are for instruction class (3V Diff)
2708 // normal long/long2 pattern
2709 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2710 string asmop, string ResS, string OpS,
2711 SDPatternOperator opnode, SDPatternOperator ext,
2712 RegisterOperand OpVPR,
2713 ValueType ResTy, ValueType OpTy>
2714 : NeonI_3VDiff<q, u, size, opcode,
2715 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2716 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2717 [(set (ResTy VPR128:$Rd),
2718 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2719 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2722 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2723 string asmop, SDPatternOperator opnode,
2724 bit Commutable = 0> {
2725 let isCommutable = Commutable in {
2726 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2727 opnode, sext, VPR64, v8i16, v8i8>;
2728 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2729 opnode, sext, VPR64, v4i32, v4i16>;
2730 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2731 opnode, sext, VPR64, v2i64, v2i32>;
2735 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2736 SDPatternOperator opnode, bit Commutable = 0> {
2737 let isCommutable = Commutable in {
2738 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2739 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2740 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2741 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2742 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2743 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2747 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2748 SDPatternOperator opnode, bit Commutable = 0> {
2749 let isCommutable = Commutable in {
2750 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2751 opnode, zext, VPR64, v8i16, v8i8>;
2752 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2753 opnode, zext, VPR64, v4i32, v4i16>;
2754 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2755 opnode, zext, VPR64, v2i64, v2i32>;
2759 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2760 SDPatternOperator opnode, bit Commutable = 0> {
2761 let isCommutable = Commutable in {
2762 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2763 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2764 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2765 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2766 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2767 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2771 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2772 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2774 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2775 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2777 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2778 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2780 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2781 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2783 // normal wide/wide2 pattern
2784 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2785 string asmop, string ResS, string OpS,
2786 SDPatternOperator opnode, SDPatternOperator ext,
2787 RegisterOperand OpVPR,
2788 ValueType ResTy, ValueType OpTy>
2789 : NeonI_3VDiff<q, u, size, opcode,
2790 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2791 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2792 [(set (ResTy VPR128:$Rd),
2793 (ResTy (opnode (ResTy VPR128:$Rn),
2794 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2797 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2798 SDPatternOperator opnode> {
2799 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2800 opnode, sext, VPR64, v8i16, v8i8>;
2801 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2802 opnode, sext, VPR64, v4i32, v4i16>;
2803 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2804 opnode, sext, VPR64, v2i64, v2i32>;
2807 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2808 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2810 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2811 SDPatternOperator opnode> {
2812 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2813 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2814 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2815 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2816 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2817 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2820 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2821 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2823 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2824 SDPatternOperator opnode> {
2825 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2826 opnode, zext, VPR64, v8i16, v8i8>;
2827 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2828 opnode, zext, VPR64, v4i32, v4i16>;
2829 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2830 opnode, zext, VPR64, v2i64, v2i32>;
2833 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2834 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2836 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2837 SDPatternOperator opnode> {
2838 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2839 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2840 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2841 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2842 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2843 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2846 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2847 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2849 // Get the high half part of the vector element.
2850 multiclass NeonI_get_high {
2851 def _8h : PatFrag<(ops node:$Rn),
2852 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2853 (v8i16 (Neon_vdup (i32 8)))))))>;
2854 def _4s : PatFrag<(ops node:$Rn),
2855 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2856 (v4i32 (Neon_vdup (i32 16)))))))>;
2857 def _2d : PatFrag<(ops node:$Rn),
2858 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2859 (v2i64 (Neon_vdup (i32 32)))))))>;
2862 defm NI_get_hi : NeonI_get_high;
2864 // pattern for addhn/subhn with 2 operands
2865 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2866 string asmop, string ResS, string OpS,
2867 SDPatternOperator opnode, SDPatternOperator get_hi,
2868 ValueType ResTy, ValueType OpTy>
2869 : NeonI_3VDiff<q, u, size, opcode,
2870 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2871 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2872 [(set (ResTy VPR64:$Rd),
2874 (OpTy (opnode (OpTy VPR128:$Rn),
2875 (OpTy VPR128:$Rm))))))],
2878 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2879 SDPatternOperator opnode, bit Commutable = 0> {
2880 let isCommutable = Commutable in {
2881 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2882 opnode, NI_get_hi_8h, v8i8, v8i16>;
2883 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2884 opnode, NI_get_hi_4s, v4i16, v4i32>;
2885 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2886 opnode, NI_get_hi_2d, v2i32, v2i64>;
2890 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2891 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2893 // pattern for operation with 2 operands
2894 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2895 string asmop, string ResS, string OpS,
2896 SDPatternOperator opnode,
2897 RegisterOperand ResVPR, RegisterOperand OpVPR,
2898 ValueType ResTy, ValueType OpTy>
2899 : NeonI_3VDiff<q, u, size, opcode,
2900 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2901 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2902 [(set (ResTy ResVPR:$Rd),
2903 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2906 // normal narrow pattern
2907 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2908 SDPatternOperator opnode, bit Commutable = 0> {
2909 let isCommutable = Commutable in {
2910 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2911 opnode, VPR64, VPR128, v8i8, v8i16>;
2912 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2913 opnode, VPR64, VPR128, v4i16, v4i32>;
2914 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2915 opnode, VPR64, VPR128, v2i32, v2i64>;
2919 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2920 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2922 // pattern for acle intrinsic with 3 operands
2923 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2924 string asmop, string ResS, string OpS>
2925 : NeonI_3VDiff<q, u, size, opcode,
2926 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2927 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2929 let Constraints = "$src = $Rd";
2930 let neverHasSideEffects = 1;
2933 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2934 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2935 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2936 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2939 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2940 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2942 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2943 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2945 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2947 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2948 SDPatternOperator coreop>
2949 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2950 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2951 (SrcTy VPR128:$Rm)))))),
2952 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2953 VPR128:$Rn, VPR128:$Rm)>;
2956 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2957 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2958 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2959 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2960 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2961 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2964 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2965 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2966 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2967 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2968 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2969 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2972 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2973 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2974 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2977 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2978 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2979 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2981 // pattern that need to extend result
2982 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2983 string asmop, string ResS, string OpS,
2984 SDPatternOperator opnode,
2985 RegisterOperand OpVPR,
2986 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2987 : NeonI_3VDiff<q, u, size, opcode,
2988 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2989 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2990 [(set (ResTy VPR128:$Rd),
2991 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2992 (OpTy OpVPR:$Rm))))))],
2995 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2996 SDPatternOperator opnode, bit Commutable = 0> {
2997 let isCommutable = Commutable in {
2998 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2999 opnode, VPR64, v8i16, v8i8, v8i8>;
3000 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3001 opnode, VPR64, v4i32, v4i16, v4i16>;
3002 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3003 opnode, VPR64, v2i64, v2i32, v2i32>;
3007 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
3008 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
3010 multiclass NeonI_Op_High<SDPatternOperator op> {
3011 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
3012 (op (v8i8 (Neon_High16B node:$Rn)),
3013 (v8i8 (Neon_High16B node:$Rm)))>;
3014 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
3015 (op (v4i16 (Neon_High8H node:$Rn)),
3016 (v4i16 (Neon_High8H node:$Rm)))>;
3017 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
3018 (op (v2i32 (Neon_High4S node:$Rn)),
3019 (v2i32 (Neon_High4S node:$Rm)))>;
3022 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
3023 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
3024 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
3025 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
3026 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
3027 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
3029 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
3030 bit Commutable = 0> {
3031 let isCommutable = Commutable in {
3032 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3033 !cast<PatFrag>(opnode # "_16B"),
3034 VPR128, v8i16, v16i8, v8i8>;
3035 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3036 !cast<PatFrag>(opnode # "_8H"),
3037 VPR128, v4i32, v8i16, v4i16>;
3038 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3039 !cast<PatFrag>(opnode # "_4S"),
3040 VPR128, v2i64, v4i32, v2i32>;
3044 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
3045 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
3047 // For pattern that need two operators being chained.
3048 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
3049 string asmop, string ResS, string OpS,
3050 SDPatternOperator opnode, SDPatternOperator subop,
3051 RegisterOperand OpVPR,
3052 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
3053 : NeonI_3VDiff<q, u, size, opcode,
3054 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3055 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3056 [(set (ResTy VPR128:$Rd),
3058 (ResTy VPR128:$src),
3059 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
3060 (OpTy OpVPR:$Rm))))))))],
3062 let Constraints = "$src = $Rd";
3065 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
3066 SDPatternOperator opnode, SDPatternOperator subop>{
3067 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3068 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
3069 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3070 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
3071 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3072 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
3075 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
3076 add, int_arm_neon_vabds>;
3077 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
3078 add, int_arm_neon_vabdu>;
3080 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
3081 SDPatternOperator opnode, string subop> {
3082 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3083 opnode, !cast<PatFrag>(subop # "_16B"),
3084 VPR128, v8i16, v16i8, v8i8>;
3085 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3086 opnode, !cast<PatFrag>(subop # "_8H"),
3087 VPR128, v4i32, v8i16, v4i16>;
3088 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3089 opnode, !cast<PatFrag>(subop # "_4S"),
3090 VPR128, v2i64, v4i32, v2i32>;
3093 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
3095 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
3098 // Long pattern with 2 operands
3099 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
3100 SDPatternOperator opnode, bit Commutable = 0> {
3101 let isCommutable = Commutable in {
3102 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3103 opnode, VPR128, VPR64, v8i16, v8i8>;
3104 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3105 opnode, VPR128, VPR64, v4i32, v4i16>;
3106 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3107 opnode, VPR128, VPR64, v2i64, v2i32>;
3111 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
3112 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
3114 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
3115 string asmop, string ResS, string OpS,
3116 SDPatternOperator opnode,
3117 ValueType ResTy, ValueType OpTy>
3118 : NeonI_3VDiff<q, u, size, opcode,
3119 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3120 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3121 [(set (ResTy VPR128:$Rd),
3122 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
3125 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
3126 string opnode, bit Commutable = 0> {
3127 let isCommutable = Commutable in {
3128 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3129 !cast<PatFrag>(opnode # "_16B"),
3131 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3132 !cast<PatFrag>(opnode # "_8H"),
3134 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3135 !cast<PatFrag>(opnode # "_4S"),
3140 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
3142 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
3145 // Long pattern with 3 operands
3146 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
3147 string asmop, string ResS, string OpS,
3148 SDPatternOperator opnode,
3149 ValueType ResTy, ValueType OpTy>
3150 : NeonI_3VDiff<q, u, size, opcode,
3151 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
3152 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3153 [(set (ResTy VPR128:$Rd),
3155 (ResTy VPR128:$src),
3156 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
3158 let Constraints = "$src = $Rd";
3161 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
3162 SDPatternOperator opnode> {
3163 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3164 opnode, v8i16, v8i8>;
3165 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3166 opnode, v4i32, v4i16>;
3167 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3168 opnode, v2i64, v2i32>;
3171 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3173 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3175 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3177 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3179 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3181 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3183 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3185 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3187 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
3188 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
3190 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
3191 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
3193 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
3194 string asmop, string ResS, string OpS,
3195 SDPatternOperator subop, SDPatternOperator opnode,
3196 RegisterOperand OpVPR,
3197 ValueType ResTy, ValueType OpTy>
3198 : NeonI_3VDiff<q, u, size, opcode,
3199 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3200 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3201 [(set (ResTy VPR128:$Rd),
3203 (ResTy VPR128:$src),
3204 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3206 let Constraints = "$src = $Rd";
3209 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
3210 SDPatternOperator subop, string opnode> {
3211 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3212 subop, !cast<PatFrag>(opnode # "_16B"),
3213 VPR128, v8i16, v16i8>;
3214 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3215 subop, !cast<PatFrag>(opnode # "_8H"),
3216 VPR128, v4i32, v8i16>;
3217 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3218 subop, !cast<PatFrag>(opnode # "_4S"),
3219 VPR128, v2i64, v4i32>;
3222 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3223 add, "NI_smull_hi">;
3224 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3225 add, "NI_umull_hi">;
3227 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3228 sub, "NI_smull_hi">;
3229 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3230 sub, "NI_umull_hi">;
3232 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3233 SDPatternOperator opnode> {
3234 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3235 opnode, int_arm_neon_vqdmull,
3236 VPR64, v4i32, v4i16>;
3237 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3238 opnode, int_arm_neon_vqdmull,
3239 VPR64, v2i64, v2i32>;
3242 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3243 int_arm_neon_vqadds>;
3244 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3245 int_arm_neon_vqsubs>;
3247 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3248 SDPatternOperator opnode, bit Commutable = 0> {
3249 let isCommutable = Commutable in {
3250 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3251 opnode, VPR128, VPR64, v4i32, v4i16>;
3252 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3253 opnode, VPR128, VPR64, v2i64, v2i32>;
3257 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3258 int_arm_neon_vqdmull, 1>;
3260 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
3261 string opnode, bit Commutable = 0> {
3262 let isCommutable = Commutable in {
3263 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3264 !cast<PatFrag>(opnode # "_8H"),
3266 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3267 !cast<PatFrag>(opnode # "_4S"),
3272 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
3275 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
3276 SDPatternOperator opnode> {
3277 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3278 opnode, NI_qdmull_hi_8H,
3279 VPR128, v4i32, v8i16>;
3280 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3281 opnode, NI_qdmull_hi_4S,
3282 VPR128, v2i64, v4i32>;
3285 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3286 int_arm_neon_vqadds>;
3287 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3288 int_arm_neon_vqsubs>;
3290 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3291 SDPatternOperator opnode_8h8b,
3292 SDPatternOperator opnode_1q1d, bit Commutable = 0> {
3293 let isCommutable = Commutable in {
3294 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3295 opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
3297 def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
3298 opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
3302 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
3303 int_aarch64_neon_vmull_p64, 1>;
3305 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
3306 string opnode, bit Commutable = 0> {
3307 let isCommutable = Commutable in {
3308 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3309 !cast<PatFrag>(opnode # "_16B"),
3313 NeonI_3VDiff<0b1, u, 0b11, opcode,
3314 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3315 asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
3316 [(set (v16i8 VPR128:$Rd),
3317 (v16i8 (int_aarch64_neon_vmull_p64
3318 (v1i64 (scalar_to_vector
3319 (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
3320 (v1i64 (scalar_to_vector
3321 (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
3325 def : Pat<(v16i8 (int_aarch64_neon_vmull_p64
3326 (v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 1))),
3327 (v1i64 (extract_subvector (v2i64 VPR128:$Rm), (i64 1))))),
3328 (!cast<Instruction>(NAME # "_1q2d") VPR128:$Rn, VPR128:$Rm)>;
3331 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3334 // End of implementation for instruction class (3V Diff)
3336 // The followings are vector load/store multiple N-element structure
3337 // (class SIMD lselem).
3339 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
3340 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3341 // The structure consists of a sequence of sets of N values.
3342 // The first element of the structure is placed in the first lane
3343 // of the first first vector, the second element in the first lane
3344 // of the second vector, and so on.
3345 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3346 // the three 64-bit vectors list {BA, DC, FE}.
3347 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3348 // 64-bit vectors list {DA, EB, FC}.
3349 // Store instructions store multiple structure to N registers like load.
3352 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3353 RegisterOperand VecList, string asmop>
3354 : NeonI_LdStMult<q, 1, opcode, size,
3355 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3356 asmop # "\t$Rt, [$Rn]",
3360 let neverHasSideEffects = 1;
3363 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3364 def _8B : NeonI_LDVList<0, opcode, 0b00,
3365 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3367 def _4H : NeonI_LDVList<0, opcode, 0b01,
3368 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3370 def _2S : NeonI_LDVList<0, opcode, 0b10,
3371 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3373 def _16B : NeonI_LDVList<1, opcode, 0b00,
3374 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3376 def _8H : NeonI_LDVList<1, opcode, 0b01,
3377 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3379 def _4S : NeonI_LDVList<1, opcode, 0b10,
3380 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3382 def _2D : NeonI_LDVList<1, opcode, 0b11,
3383 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3386 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3387 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3388 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3390 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3392 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3394 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3396 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3397 defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
3398 def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3400 defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3401 def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3403 defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3404 def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3406 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3407 RegisterOperand VecList, string asmop>
3408 : NeonI_LdStMult<q, 0, opcode, size,
3409 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3410 asmop # "\t$Rt, [$Rn]",
3414 let neverHasSideEffects = 1;
3417 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3418 def _8B : NeonI_STVList<0, opcode, 0b00,
3419 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3421 def _4H : NeonI_STVList<0, opcode, 0b01,
3422 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3424 def _2S : NeonI_STVList<0, opcode, 0b10,
3425 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3427 def _16B : NeonI_STVList<1, opcode, 0b00,
3428 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3430 def _8H : NeonI_STVList<1, opcode, 0b01,
3431 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3433 def _4S : NeonI_STVList<1, opcode, 0b10,
3434 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3436 def _2D : NeonI_STVList<1, opcode, 0b11,
3437 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3440 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3441 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3442 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3444 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3446 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3448 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3450 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3451 defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
3452 def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3454 defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
3455 def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3457 defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
3458 def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3460 def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3461 def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3463 def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3464 def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3466 def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
3467 def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
3469 def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3470 def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3472 def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3473 def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3475 def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
3476 def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
3478 def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
3479 (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3480 def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
3481 (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3483 def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
3484 (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3485 def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
3486 (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3488 def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
3489 (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
3490 def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
3491 (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
3493 def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
3494 (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3495 def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
3496 (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3498 def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
3499 (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3500 def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
3501 (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3503 def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
3504 (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
3505 def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
3506 (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
3508 // Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
3509 // FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
3510 // these patterns are not needed any more.
3511 def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
3512 def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
3513 def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
3515 def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
3516 (LSFP8_STR $value, $addr, 0)>;
3517 def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
3518 (LSFP16_STR $value, $addr, 0)>;
3519 def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
3520 (LSFP32_STR $value, $addr, 0)>;
3523 // End of vector load/store multiple N-element structure(class SIMD lselem)
3525 // The followings are post-index vector load/store multiple N-element
3526 // structure(class SIMD lselem-post)
3527 def exact1_asmoperand : AsmOperandClass {
3528 let Name = "Exact1";
3529 let PredicateMethod = "isExactImm<1>";
3530 let RenderMethod = "addImmOperands";
3532 def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
3533 let ParserMatchClass = exact1_asmoperand;
3536 def exact2_asmoperand : AsmOperandClass {
3537 let Name = "Exact2";
3538 let PredicateMethod = "isExactImm<2>";
3539 let RenderMethod = "addImmOperands";
3541 def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
3542 let ParserMatchClass = exact2_asmoperand;
3545 def exact3_asmoperand : AsmOperandClass {
3546 let Name = "Exact3";
3547 let PredicateMethod = "isExactImm<3>";
3548 let RenderMethod = "addImmOperands";
3550 def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
3551 let ParserMatchClass = exact3_asmoperand;
3554 def exact4_asmoperand : AsmOperandClass {
3555 let Name = "Exact4";
3556 let PredicateMethod = "isExactImm<4>";
3557 let RenderMethod = "addImmOperands";
3559 def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
3560 let ParserMatchClass = exact4_asmoperand;
3563 def exact6_asmoperand : AsmOperandClass {
3564 let Name = "Exact6";
3565 let PredicateMethod = "isExactImm<6>";
3566 let RenderMethod = "addImmOperands";
3568 def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
3569 let ParserMatchClass = exact6_asmoperand;
3572 def exact8_asmoperand : AsmOperandClass {
3573 let Name = "Exact8";
3574 let PredicateMethod = "isExactImm<8>";
3575 let RenderMethod = "addImmOperands";
3577 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3578 let ParserMatchClass = exact8_asmoperand;
3581 def exact12_asmoperand : AsmOperandClass {
3582 let Name = "Exact12";
3583 let PredicateMethod = "isExactImm<12>";
3584 let RenderMethod = "addImmOperands";
3586 def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
3587 let ParserMatchClass = exact12_asmoperand;
3590 def exact16_asmoperand : AsmOperandClass {
3591 let Name = "Exact16";
3592 let PredicateMethod = "isExactImm<16>";
3593 let RenderMethod = "addImmOperands";
3595 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3596 let ParserMatchClass = exact16_asmoperand;
3599 def exact24_asmoperand : AsmOperandClass {
3600 let Name = "Exact24";
3601 let PredicateMethod = "isExactImm<24>";
3602 let RenderMethod = "addImmOperands";
3604 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3605 let ParserMatchClass = exact24_asmoperand;
3608 def exact32_asmoperand : AsmOperandClass {
3609 let Name = "Exact32";
3610 let PredicateMethod = "isExactImm<32>";
3611 let RenderMethod = "addImmOperands";
3613 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3614 let ParserMatchClass = exact32_asmoperand;
3617 def exact48_asmoperand : AsmOperandClass {
3618 let Name = "Exact48";
3619 let PredicateMethod = "isExactImm<48>";
3620 let RenderMethod = "addImmOperands";
3622 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3623 let ParserMatchClass = exact48_asmoperand;
3626 def exact64_asmoperand : AsmOperandClass {
3627 let Name = "Exact64";
3628 let PredicateMethod = "isExactImm<64>";
3629 let RenderMethod = "addImmOperands";
3631 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3632 let ParserMatchClass = exact64_asmoperand;
3635 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3636 RegisterOperand VecList, Operand ImmTy,
3638 let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3639 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3640 def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3641 (outs VecList:$Rt, GPR64xsp:$wb),
3642 (ins GPR64xsp:$Rn, ImmTy:$amt),
3643 asmop # "\t$Rt, [$Rn], $amt",
3649 def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3650 (outs VecList:$Rt, GPR64xsp:$wb),
3651 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3652 asmop # "\t$Rt, [$Rn], $Rm",
3658 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3659 Operand ImmTy2, string asmop> {
3660 defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3661 !cast<RegisterOperand>(List # "8B_operand"),
3664 defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3665 !cast<RegisterOperand>(List # "4H_operand"),
3668 defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3669 !cast<RegisterOperand>(List # "2S_operand"),
3672 defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3673 !cast<RegisterOperand>(List # "16B_operand"),
3676 defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3677 !cast<RegisterOperand>(List # "8H_operand"),
3680 defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3681 !cast<RegisterOperand>(List # "4S_operand"),
3684 defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3685 !cast<RegisterOperand>(List # "2D_operand"),
3689 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3690 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3691 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3694 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3696 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3699 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3701 // Post-index load multiple 1-element structures from N consecutive registers
3703 defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3705 defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3706 uimm_exact16, "ld1">;
3708 defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3710 defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3711 uimm_exact24, "ld1">;
3713 defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3715 defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3716 uimm_exact32, "ld1">;
3718 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3719 RegisterOperand VecList, Operand ImmTy,
3721 let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3722 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3723 def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3724 (outs GPR64xsp:$wb),
3725 (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3726 asmop # "\t$Rt, [$Rn], $amt",
3732 def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3733 (outs GPR64xsp:$wb),
3734 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3735 asmop # "\t$Rt, [$Rn], $Rm",
3741 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3742 Operand ImmTy2, string asmop> {
3743 defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3744 !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3746 defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3747 !cast<RegisterOperand>(List # "4H_operand"),
3750 defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3751 !cast<RegisterOperand>(List # "2S_operand"),
3754 defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3755 !cast<RegisterOperand>(List # "16B_operand"),
3758 defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3759 !cast<RegisterOperand>(List # "8H_operand"),
3762 defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3763 !cast<RegisterOperand>(List # "4S_operand"),
3766 defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3767 !cast<RegisterOperand>(List # "2D_operand"),
3771 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3772 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3773 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3776 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3778 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3781 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3783 // Post-index load multiple 1-element structures from N consecutive registers
3785 defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3787 defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3788 uimm_exact16, "st1">;
3790 defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3792 defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3793 uimm_exact24, "st1">;
3795 defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3797 defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3798 uimm_exact32, "st1">;
3800 // End of post-index vector load/store multiple N-element structure
3801 // (class SIMD lselem-post)
3803 // The followings are vector load/store single N-element structure
3804 // (class SIMD lsone).
3805 def neon_uimm0_bare : Operand<i64>,
3806 ImmLeaf<i64, [{return Imm == 0;}]> {
3807 let ParserMatchClass = neon_uimm0_asmoperand;
3808 let PrintMethod = "printUImmBareOperand";
3811 def neon_uimm1_bare : Operand<i64>,
3812 ImmLeaf<i64, [{return Imm < 2;}]> {
3813 let ParserMatchClass = neon_uimm1_asmoperand;
3814 let PrintMethod = "printUImmBareOperand";
3817 def neon_uimm2_bare : Operand<i64>,
3818 ImmLeaf<i64, [{return Imm < 4;}]> {
3819 let ParserMatchClass = neon_uimm2_asmoperand;
3820 let PrintMethod = "printUImmBareOperand";
3823 def neon_uimm3_bare : Operand<i64>,
3824 ImmLeaf<i64, [{return Imm < 8;}]> {
3825 let ParserMatchClass = uimm3_asmoperand;
3826 let PrintMethod = "printUImmBareOperand";
3829 def neon_uimm4_bare : Operand<i64>,
3830 ImmLeaf<i64, [{return Imm < 16;}]> {
3831 let ParserMatchClass = uimm4_asmoperand;
3832 let PrintMethod = "printUImmBareOperand";
3835 class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3836 RegisterOperand VecList, string asmop>
3837 : NeonI_LdOne_Dup<q, r, opcode, size,
3838 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3839 asmop # "\t$Rt, [$Rn]",
3843 let neverHasSideEffects = 1;
3846 multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
3847 def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
3848 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3850 def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
3851 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3853 def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
3854 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3856 def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
3857 !cast<RegisterOperand>(List # "1D_operand"), asmop>;
3859 def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
3860 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3862 def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
3863 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3865 def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
3866 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3868 def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
3869 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3872 // Load single 1-element structure to all lanes of 1 register
3873 defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
3875 // Load single N-element structure to all lanes of N consecutive
3876 // registers (N = 2,3,4)
3877 defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
3878 defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
3879 defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
3882 class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3884 : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
3885 (VTy (INST GPR64xsp:$Rn))>;
3887 // Match all LD1R instructions
3888 def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
3890 def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
3892 def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
3894 def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
3896 def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
3897 def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
3899 def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
3900 def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
3902 def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
3903 def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
3905 class LD1R_pattern_v1 <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3907 : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))),
3908 (VTy (INST GPR64xsp:$Rn))>;
3910 def : LD1R_pattern_v1<v1i64, i64, load, LD1R_1D>;
3911 def : LD1R_pattern_v1<v1f64, f64, load, LD1R_1D>;
3913 multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
3914 RegisterClass RegList> {
3915 defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
3916 defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
3917 defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
3918 defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
3921 // Special vector list operand of 128-bit vectors with bare layout.
3922 // i.e. only show ".b", ".h", ".s", ".d"
3923 defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
3924 defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
3925 defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
3926 defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
3928 class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3929 Operand ImmOp, string asmop>
3930 : NeonI_LdStOne_Lane<1, r, op2_1, op0,
3932 (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
3933 asmop # "\t$Rt[$lane], [$Rn]",
3937 let neverHasSideEffects = 1;
3938 let hasExtraDefRegAllocReq = 1;
3939 let Constraints = "$src = $Rt";
3942 multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3943 def _B : NeonI_LDN_Lane<r, 0b00, op0,
3944 !cast<RegisterOperand>(List # "B_operand"),
3945 neon_uimm4_bare, asmop> {
3946 let Inst{12-10} = lane{2-0};
3947 let Inst{30} = lane{3};
3950 def _H : NeonI_LDN_Lane<r, 0b01, op0,
3951 !cast<RegisterOperand>(List # "H_operand"),
3952 neon_uimm3_bare, asmop> {
3953 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3954 let Inst{30} = lane{2};
3957 def _S : NeonI_LDN_Lane<r, 0b10, op0,
3958 !cast<RegisterOperand>(List # "S_operand"),
3959 neon_uimm2_bare, asmop> {
3960 let Inst{12-10} = {lane{0}, 0b0, 0b0};
3961 let Inst{30} = lane{1};
3964 def _D : NeonI_LDN_Lane<r, 0b10, op0,
3965 !cast<RegisterOperand>(List # "D_operand"),
3966 neon_uimm1_bare, asmop> {
3967 let Inst{12-10} = 0b001;
3968 let Inst{30} = lane{0};
3972 // Load single 1-element structure to one lane of 1 register.
3973 defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
3975 // Load single N-element structure to one lane of N consecutive registers
3977 defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
3978 defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
3979 defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
3981 multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3982 Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
3984 def : Pat<(VTy (vector_insert (VTy VPR64:$src),
3985 (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
3986 (VTy (EXTRACT_SUBREG
3988 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
3992 def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
3993 (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
3994 (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
3997 // Match all LD1LN instructions
3998 defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3999 extloadi8, LD1LN_B>;
4001 defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
4002 extloadi16, LD1LN_H>;
4004 defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
4006 defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
4009 defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
4011 defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
4014 class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4015 Operand ImmOp, string asmop>
4016 : NeonI_LdStOne_Lane<0, r, op2_1, op0,
4017 (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
4018 asmop # "\t$Rt[$lane], [$Rn]",
4022 let neverHasSideEffects = 1;
4023 let hasExtraDefRegAllocReq = 1;
4026 multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
4027 def _B : NeonI_STN_Lane<r, 0b00, op0,
4028 !cast<RegisterOperand>(List # "B_operand"),
4029 neon_uimm4_bare, asmop> {
4030 let Inst{12-10} = lane{2-0};
4031 let Inst{30} = lane{3};
4034 def _H : NeonI_STN_Lane<r, 0b01, op0,
4035 !cast<RegisterOperand>(List # "H_operand"),
4036 neon_uimm3_bare, asmop> {
4037 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4038 let Inst{30} = lane{2};
4041 def _S : NeonI_STN_Lane<r, 0b10, op0,
4042 !cast<RegisterOperand>(List # "S_operand"),
4043 neon_uimm2_bare, asmop> {
4044 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4045 let Inst{30} = lane{1};
4048 def _D : NeonI_STN_Lane<r, 0b10, op0,
4049 !cast<RegisterOperand>(List # "D_operand"),
4050 neon_uimm1_bare, asmop>{
4051 let Inst{12-10} = 0b001;
4052 let Inst{30} = lane{0};
4056 // Store single 1-element structure from one lane of 1 register.
4057 defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
4059 // Store single N-element structure from one lane of N consecutive registers
4061 defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
4062 defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
4063 defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
4065 multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
4066 Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
4068 def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
4071 (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
4074 def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
4076 (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
4079 // Match all ST1LN instructions
4080 defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
4081 truncstorei8, ST1LN_B>;
4083 defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
4084 truncstorei16, ST1LN_H>;
4086 defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
4088 defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
4091 defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
4093 defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
4096 // End of vector load/store single N-element structure (class SIMD lsone).
4099 // The following are post-index load/store single N-element instructions
4100 // (class SIMD lsone-post)
4102 multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
4103 RegisterOperand VecList, Operand ImmTy,
4105 let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
4106 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4107 def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
4108 (outs VecList:$Rt, GPR64xsp:$wb),
4109 (ins GPR64xsp:$Rn, ImmTy:$amt),
4110 asmop # "\t$Rt, [$Rn], $amt",
4116 def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
4117 (outs VecList:$Rt, GPR64xsp:$wb),
4118 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
4119 asmop # "\t$Rt, [$Rn], $Rm",
4125 multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
4126 Operand uimm_b, Operand uimm_h,
4127 Operand uimm_s, Operand uimm_d> {
4128 defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
4129 !cast<RegisterOperand>(List # "8B_operand"),
4132 defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
4133 !cast<RegisterOperand>(List # "4H_operand"),
4136 defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
4137 !cast<RegisterOperand>(List # "2S_operand"),
4140 defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
4141 !cast<RegisterOperand>(List # "1D_operand"),
4144 defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
4145 !cast<RegisterOperand>(List # "16B_operand"),
4148 defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
4149 !cast<RegisterOperand>(List # "8H_operand"),
4152 defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
4153 !cast<RegisterOperand>(List # "4S_operand"),
4156 defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
4157 !cast<RegisterOperand>(List # "2D_operand"),
4161 // Post-index load single 1-element structure to all lanes of 1 register
4162 defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
4163 uimm_exact2, uimm_exact4, uimm_exact8>;
4165 // Post-index load single N-element structure to all lanes of N consecutive
4166 // registers (N = 2,3,4)
4167 defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
4168 uimm_exact4, uimm_exact8, uimm_exact16>;
4169 defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
4170 uimm_exact6, uimm_exact12, uimm_exact24>;
4171 defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
4172 uimm_exact8, uimm_exact16, uimm_exact32>;
4174 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
4175 Constraints = "$Rn = $wb, $Rt = $src",
4176 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4177 class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4178 Operand ImmTy, Operand ImmOp, string asmop>
4179 : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
4180 (outs VList:$Rt, GPR64xsp:$wb),
4181 (ins GPR64xsp:$Rn, ImmTy:$amt,
4182 VList:$src, ImmOp:$lane),
4183 asmop # "\t$Rt[$lane], [$Rn], $amt",
4189 class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4190 Operand ImmTy, Operand ImmOp, string asmop>
4191 : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
4192 (outs VList:$Rt, GPR64xsp:$wb),
4193 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
4194 VList:$src, ImmOp:$lane),
4195 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4200 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4201 Operand uimm_b, Operand uimm_h,
4202 Operand uimm_s, Operand uimm_d> {
4203 def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
4204 !cast<RegisterOperand>(List # "B_operand"),
4205 uimm_b, neon_uimm4_bare, asmop> {
4206 let Inst{12-10} = lane{2-0};
4207 let Inst{30} = lane{3};
4210 def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
4211 !cast<RegisterOperand>(List # "B_operand"),
4212 uimm_b, neon_uimm4_bare, asmop> {
4213 let Inst{12-10} = lane{2-0};
4214 let Inst{30} = lane{3};
4217 def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
4218 !cast<RegisterOperand>(List # "H_operand"),
4219 uimm_h, neon_uimm3_bare, asmop> {
4220 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4221 let Inst{30} = lane{2};
4224 def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
4225 !cast<RegisterOperand>(List # "H_operand"),
4226 uimm_h, neon_uimm3_bare, asmop> {
4227 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4228 let Inst{30} = lane{2};
4231 def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4232 !cast<RegisterOperand>(List # "S_operand"),
4233 uimm_s, neon_uimm2_bare, asmop> {
4234 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4235 let Inst{30} = lane{1};
4238 def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
4239 !cast<RegisterOperand>(List # "S_operand"),
4240 uimm_s, neon_uimm2_bare, asmop> {
4241 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4242 let Inst{30} = lane{1};
4245 def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4246 !cast<RegisterOperand>(List # "D_operand"),
4247 uimm_d, neon_uimm1_bare, asmop> {
4248 let Inst{12-10} = 0b001;
4249 let Inst{30} = lane{0};
4252 def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
4253 !cast<RegisterOperand>(List # "D_operand"),
4254 uimm_d, neon_uimm1_bare, asmop> {
4255 let Inst{12-10} = 0b001;
4256 let Inst{30} = lane{0};
4260 // Post-index load single 1-element structure to one lane of 1 register.
4261 defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
4262 uimm_exact2, uimm_exact4, uimm_exact8>;
4264 // Post-index load single N-element structure to one lane of N consecutive
4267 defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
4268 uimm_exact4, uimm_exact8, uimm_exact16>;
4269 defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
4270 uimm_exact6, uimm_exact12, uimm_exact24>;
4271 defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
4272 uimm_exact8, uimm_exact16, uimm_exact32>;
4274 let mayStore = 1, neverHasSideEffects = 1,
4275 hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
4276 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4277 class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4278 Operand ImmTy, Operand ImmOp, string asmop>
4279 : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4280 (outs GPR64xsp:$wb),
4281 (ins GPR64xsp:$Rn, ImmTy:$amt,
4282 VList:$Rt, ImmOp:$lane),
4283 asmop # "\t$Rt[$lane], [$Rn], $amt",
4289 class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4290 Operand ImmTy, Operand ImmOp, string asmop>
4291 : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4292 (outs GPR64xsp:$wb),
4293 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
4295 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4300 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4301 Operand uimm_b, Operand uimm_h,
4302 Operand uimm_s, Operand uimm_d> {
4303 def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
4304 !cast<RegisterOperand>(List # "B_operand"),
4305 uimm_b, neon_uimm4_bare, asmop> {
4306 let Inst{12-10} = lane{2-0};
4307 let Inst{30} = lane{3};
4310 def _B_register : STN_WBReg_Lane<r, 0b00, op0,
4311 !cast<RegisterOperand>(List # "B_operand"),
4312 uimm_b, neon_uimm4_bare, asmop> {
4313 let Inst{12-10} = lane{2-0};
4314 let Inst{30} = lane{3};
4317 def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
4318 !cast<RegisterOperand>(List # "H_operand"),
4319 uimm_h, neon_uimm3_bare, asmop> {
4320 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4321 let Inst{30} = lane{2};
4324 def _H_register : STN_WBReg_Lane<r, 0b01, op0,
4325 !cast<RegisterOperand>(List # "H_operand"),
4326 uimm_h, neon_uimm3_bare, asmop> {
4327 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4328 let Inst{30} = lane{2};
4331 def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
4332 !cast<RegisterOperand>(List # "S_operand"),
4333 uimm_s, neon_uimm2_bare, asmop> {
4334 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4335 let Inst{30} = lane{1};
4338 def _S_register : STN_WBReg_Lane<r, 0b10, op0,
4339 !cast<RegisterOperand>(List # "S_operand"),
4340 uimm_s, neon_uimm2_bare, asmop> {
4341 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4342 let Inst{30} = lane{1};
4345 def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
4346 !cast<RegisterOperand>(List # "D_operand"),
4347 uimm_d, neon_uimm1_bare, asmop> {
4348 let Inst{12-10} = 0b001;
4349 let Inst{30} = lane{0};
4352 def _D_register : STN_WBReg_Lane<r, 0b10, op0,
4353 !cast<RegisterOperand>(List # "D_operand"),
4354 uimm_d, neon_uimm1_bare, asmop> {
4355 let Inst{12-10} = 0b001;
4356 let Inst{30} = lane{0};
4360 // Post-index store single 1-element structure from one lane of 1 register.
4361 defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
4362 uimm_exact2, uimm_exact4, uimm_exact8>;
4364 // Post-index store single N-element structure from one lane of N consecutive
4365 // registers (N = 2,3,4)
4366 defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
4367 uimm_exact4, uimm_exact8, uimm_exact16>;
4368 defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
4369 uimm_exact6, uimm_exact12, uimm_exact24>;
4370 defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
4371 uimm_exact8, uimm_exact16, uimm_exact32>;
4373 // End of post-index load/store single N-element instructions
4374 // (class SIMD lsone-post)
4376 // Neon Scalar instructions implementation
4377 // Scalar Three Same
4379 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4381 : NeonI_Scalar3Same<u, size, opcode,
4382 (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
4383 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4387 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
4388 : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4390 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
4391 bit Commutable = 0> {
4392 let isCommutable = Commutable in {
4393 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4394 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4398 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
4399 string asmop, bit Commutable = 0> {
4400 let isCommutable = Commutable in {
4401 def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
4402 def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
4406 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
4407 string asmop, bit Commutable = 0> {
4408 let isCommutable = Commutable in {
4409 def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
4410 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4411 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4412 def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4416 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
4417 Instruction INSTD> {
4418 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
4419 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4422 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
4427 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
4428 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
4429 (INSTB FPR8:$Rn, FPR8:$Rm)>;
4430 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4431 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4432 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4433 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4436 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
4438 Instruction INSTS> {
4439 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4440 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4441 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4442 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4445 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
4446 ValueType SResTy, ValueType STy,
4447 Instruction INSTS, ValueType DResTy,
4448 ValueType DTy, Instruction INSTD> {
4449 def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
4450 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4451 def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
4452 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4455 class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
4457 : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
4458 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4460 // Scalar Three Different
4462 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
4463 RegisterClass FPRCD, RegisterClass FPRCS>
4464 : NeonI_Scalar3Diff<u, size, opcode,
4465 (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
4466 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4470 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
4471 def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
4472 def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
4475 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
4476 let Constraints = "$Src = $Rd" in {
4477 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
4478 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
4479 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4482 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
4483 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
4484 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4490 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
4492 Instruction INSTS> {
4493 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4494 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4495 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4496 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4499 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
4501 Instruction INSTS> {
4502 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4503 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
4504 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4505 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
4508 // Scalar Two Registers Miscellaneous
4510 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4511 RegisterClass FPRCD, RegisterClass FPRCS>
4512 : NeonI_Scalar2SameMisc<u, size, opcode,
4513 (outs FPRCD:$Rd), (ins FPRCS:$Rn),
4514 !strconcat(asmop, "\t$Rd, $Rn"),
4518 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
4520 def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
4522 def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
4526 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
4527 def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
4530 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
4531 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
4532 def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
4533 def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
4534 def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
4537 class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
4538 : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
4540 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
4542 def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
4543 def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
4544 def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
4547 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
4548 string asmop, RegisterClass FPRC>
4549 : NeonI_Scalar2SameMisc<u, size, opcode,
4550 (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
4551 !strconcat(asmop, "\t$Rd, $Rn"),
4555 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
4558 let Constraints = "$Src = $Rd" in {
4559 def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
4560 def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
4561 def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
4562 def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
4566 class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
4568 : Pat<(f32 (opnode (f64 FPR64:$Rn))),
4571 multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
4573 Instruction INSTD> {
4574 def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
4576 def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
4580 class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
4582 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4585 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode,
4587 Instruction INSTD> {
4588 def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))),
4590 def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))),
4594 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
4596 Instruction INSTD> {
4597 def : Pat<(f32 (opnode (f32 FPR32:$Rn))),
4599 def : Pat<(f64 (opnode (f64 FPR64:$Rn))),
4603 class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode,
4605 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
4608 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
4609 : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4610 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
4611 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4615 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
4617 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
4618 (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm),
4619 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4622 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4623 (outs FPR64:$Rd), (ins FPR64:$Rn, fpzz32:$FPImm),
4624 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4629 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
4631 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4632 (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
4633 (INSTD FPR64:$Rn, 0)>;
4635 class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
4637 : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
4638 (i32 neon_uimm0:$Imm), CC)),
4639 (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
4641 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
4644 Instruction INSTD> {
4645 def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpzz32:$FPImm))),
4646 (INSTS FPR32:$Rn, fpzz32:$FPImm)>;
4647 def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpzz32:$FPImm))),
4648 (INSTD FPR64:$Rn, fpzz32:$FPImm)>;
4649 def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpzz32:$FPImm), CC)),
4650 (INSTD FPR64:$Rn, fpzz32:$FPImm)>;
4653 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
4654 Instruction INSTD> {
4655 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
4659 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
4664 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
4665 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
4667 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
4669 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
4673 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
4674 SDPatternOperator opnode,
4677 Instruction INSTD> {
4678 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
4680 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
4682 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
4687 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
4688 SDPatternOperator opnode,
4692 Instruction INSTD> {
4693 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
4694 (INSTB FPR8:$Src, FPR8:$Rn)>;
4695 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
4696 (INSTH FPR16:$Src, FPR16:$Rn)>;
4697 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
4698 (INSTS FPR32:$Src, FPR32:$Rn)>;
4699 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
4700 (INSTD FPR64:$Src, FPR64:$Rn)>;
4703 // Scalar Shift By Immediate
4705 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
4706 RegisterClass FPRC, Operand ImmTy>
4707 : NeonI_ScalarShiftImm<u, opcode,
4708 (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
4709 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4712 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
4714 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4716 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4717 let Inst{21-16} = Imm;
4721 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
4723 : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
4724 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
4726 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4727 let Inst{18-16} = Imm;
4729 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
4731 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4732 let Inst{19-16} = Imm;
4734 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4736 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4737 let Inst{20-16} = Imm;
4741 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
4743 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4745 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4746 let Inst{21-16} = Imm;
4750 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4752 : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4753 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4755 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4756 let Inst{18-16} = Imm;
4758 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4760 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4761 let Inst{19-16} = Imm;
4763 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4765 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4766 let Inst{20-16} = Imm;
4770 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4771 : NeonI_ScalarShiftImm<u, opcode,
4773 (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4774 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4777 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4778 let Inst{21-16} = Imm;
4779 let Constraints = "$Src = $Rd";
4782 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4783 : NeonI_ScalarShiftImm<u, opcode,
4785 (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4786 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4789 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4790 let Inst{21-16} = Imm;
4791 let Constraints = "$Src = $Rd";
4794 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4795 RegisterClass FPRCD, RegisterClass FPRCS,
4797 : NeonI_ScalarShiftImm<u, opcode,
4798 (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4799 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4802 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4804 def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4807 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4808 let Inst{18-16} = Imm;
4810 def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4813 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4814 let Inst{19-16} = Imm;
4816 def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4819 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4820 let Inst{20-16} = Imm;
4824 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4825 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4827 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4828 let Inst{20-16} = Imm;
4830 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4832 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4833 let Inst{21-16} = Imm;
4837 multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
4838 Instruction INSTD> {
4839 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4840 (INSTD FPR64:$Rn, imm:$Imm)>;
4843 multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
4844 Instruction INSTD> {
4845 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
4846 (INSTD FPR64:$Rn, imm:$Imm)>;
4849 class Neon_ScalarShiftLImm_V1_D_size_patterns<SDPatternOperator opnode,
4851 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4852 (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))),
4853 (INSTD FPR64:$Rn, imm:$Imm)>;
4855 class Neon_ScalarShiftRImm_V1_D_size_patterns<SDPatternOperator opnode,
4857 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4858 (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
4859 (INSTD FPR64:$Rn, imm:$Imm)>;
4861 multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
4866 : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
4867 def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
4868 (INSTB FPR8:$Rn, imm:$Imm)>;
4869 def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
4870 (INSTH FPR16:$Rn, imm:$Imm)>;
4871 def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
4872 (INSTS FPR32:$Rn, imm:$Imm)>;
4875 class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
4877 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4878 (i32 shl_imm64:$Imm))),
4879 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4881 class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
4883 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4884 (i32 shr_imm64:$Imm))),
4885 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4887 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4888 SDPatternOperator opnode,
4891 Instruction INSTD> {
4892 def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
4893 (INSTH FPR16:$Rn, imm:$Imm)>;
4894 def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4895 (INSTS FPR32:$Rn, imm:$Imm)>;
4896 def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4897 (INSTD FPR64:$Rn, imm:$Imm)>;
4900 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode,
4902 Instruction INSTD> {
4903 def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4904 (INSTS FPR32:$Rn, imm:$Imm)>;
4905 def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4906 (INSTD FPR64:$Rn, imm:$Imm)>;
4909 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode,
4911 Instruction INSTD> {
4912 def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4913 (INSTS FPR32:$Rn, imm:$Imm)>;
4914 def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4915 (INSTD FPR64:$Rn, imm:$Imm)>;
4918 // Scalar Signed Shift Right (Immediate)
4919 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4920 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4921 // Pattern to match llvm.arm.* intrinsic.
4922 def : Neon_ScalarShiftRImm_V1_D_size_patterns<sra, SSHRddi>;
4924 // Scalar Unsigned Shift Right (Immediate)
4925 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4926 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4927 // Pattern to match llvm.arm.* intrinsic.
4928 def : Neon_ScalarShiftRImm_V1_D_size_patterns<srl, USHRddi>;
4930 // Scalar Signed Rounding Shift Right (Immediate)
4931 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4932 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4934 // Scalar Unigned Rounding Shift Right (Immediate)
4935 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4936 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4938 // Scalar Signed Shift Right and Accumulate (Immediate)
4939 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4940 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4941 <int_aarch64_neon_vsrads_n, SSRA>;
4943 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4944 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4945 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4946 <int_aarch64_neon_vsradu_n, USRA>;
4948 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4949 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4950 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4951 <int_aarch64_neon_vrsrads_n, SRSRA>;
4953 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4954 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4955 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4956 <int_aarch64_neon_vrsradu_n, URSRA>;
4958 // Scalar Shift Left (Immediate)
4959 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4960 defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4961 // Pattern to match llvm.arm.* intrinsic.
4962 def : Neon_ScalarShiftLImm_V1_D_size_patterns<shl, SHLddi>;
4964 // Signed Saturating Shift Left (Immediate)
4965 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4966 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4968 SQSHLssi, SQSHLddi>;
4969 // Pattern to match llvm.arm.* intrinsic.
4970 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4972 // Unsigned Saturating Shift Left (Immediate)
4973 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4974 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4976 UQSHLssi, UQSHLddi>;
4977 // Pattern to match llvm.arm.* intrinsic.
4978 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4980 // Signed Saturating Shift Left Unsigned (Immediate)
4981 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4982 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4983 SQSHLUbbi, SQSHLUhhi,
4984 SQSHLUssi, SQSHLUddi>;
4986 // Shift Right And Insert (Immediate)
4987 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4988 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4989 <int_aarch64_neon_vsri, SRI>;
4991 // Shift Left And Insert (Immediate)
4992 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4993 def : Neon_ScalarShiftLImm_accum_D_size_patterns
4994 <int_aarch64_neon_vsli, SLI>;
4996 // Signed Saturating Shift Right Narrow (Immediate)
4997 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4998 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4999 SQSHRNbhi, SQSHRNhsi,
5002 // Unsigned Saturating Shift Right Narrow (Immediate)
5003 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
5004 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
5005 UQSHRNbhi, UQSHRNhsi,
5008 // Signed Saturating Rounded Shift Right Narrow (Immediate)
5009 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
5010 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
5011 SQRSHRNbhi, SQRSHRNhsi,
5014 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
5015 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
5016 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
5017 UQRSHRNbhi, UQRSHRNhsi,
5020 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
5021 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
5022 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
5023 SQSHRUNbhi, SQSHRUNhsi,
5026 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
5027 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
5028 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
5029 SQRSHRUNbhi, SQRSHRUNhsi,
5032 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
5033 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
5034 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n,
5035 SCVTF_Nssi, SCVTF_Nddi>;
5037 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
5038 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
5039 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n,
5040 UCVTF_Nssi, UCVTF_Nddi>;
5042 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
5043 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
5044 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n,
5045 FCVTZS_Nssi, FCVTZS_Nddi>;
5047 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
5048 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
5049 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n,
5050 FCVTZU_Nssi, FCVTZU_Nddi>;
5052 // Patterns For Convert Instructions Between v1f64 and v1i64
5053 class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
5055 : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
5056 (INST FPR64:$Rn, imm:$Imm)>;
5058 class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
5060 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
5061 (INST FPR64:$Rn, imm:$Imm)>;
5063 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
5066 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
5069 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
5072 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
5075 // Scalar Integer Add
5076 let isCommutable = 1 in {
5077 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
5080 // Scalar Integer Sub
5081 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
5083 // Pattern for Scalar Integer Add and Sub with D register only
5084 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
5085 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
5087 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
5088 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
5089 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
5090 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
5091 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
5093 // Scalar Integer Saturating Add (Signed, Unsigned)
5094 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
5095 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
5097 // Scalar Integer Saturating Sub (Signed, Unsigned)
5098 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
5099 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
5102 // Patterns to match llvm.aarch64.* intrinsic for
5103 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
5104 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
5105 SQADDhhh, SQADDsss, SQADDddd>;
5106 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
5107 UQADDhhh, UQADDsss, UQADDddd>;
5108 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
5109 SQSUBhhh, SQSUBsss, SQSUBddd>;
5110 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
5111 UQSUBhhh, UQSUBsss, UQSUBddd>;
5113 // Scalar Integer Saturating Doubling Multiply Half High
5114 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
5116 // Scalar Integer Saturating Rounding Doubling Multiply Half High
5117 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
5119 // Patterns to match llvm.arm.* intrinsic for
5120 // Scalar Integer Saturating Doubling Multiply Half High and
5121 // Scalar Integer Saturating Rounding Doubling Multiply Half High
5122 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
5124 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
5127 // Scalar Floating-point Multiply Extended
5128 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
5130 // Scalar Floating-point Reciprocal Step
5131 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
5132 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
5133 FRECPSsss, f64, f64, FRECPSddd>;
5134 def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5135 (FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
5137 // Scalar Floating-point Reciprocal Square Root Step
5138 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
5139 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
5140 FRSQRTSsss, f64, f64, FRSQRTSddd>;
5141 def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5142 (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
5143 def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
5145 // Patterns to match llvm.aarch64.* intrinsic for
5146 // Scalar Floating-point Multiply Extended,
5147 multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
5149 Instruction INSTD> {
5150 def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5151 (INSTS FPR32:$Rn, FPR32:$Rm)>;
5152 def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5153 (INSTD FPR64:$Rn, FPR64:$Rm)>;
5156 defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
5157 FMULXsss, FMULXddd>;
5158 def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5159 (FMULXddd FPR64:$Rn, FPR64:$Rm)>;
5161 // Scalar Integer Shift Left (Signed, Unsigned)
5162 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
5163 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
5165 // Patterns to match llvm.arm.* intrinsic for
5166 // Scalar Integer Shift Left (Signed, Unsigned)
5167 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
5168 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
5170 // Patterns to match llvm.aarch64.* intrinsic for
5171 // Scalar Integer Shift Left (Signed, Unsigned)
5172 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
5173 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
5175 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
5176 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
5177 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
5179 // Patterns to match llvm.aarch64.* intrinsic for
5180 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
5181 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
5182 SQSHLhhh, SQSHLsss, SQSHLddd>;
5183 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
5184 UQSHLhhh, UQSHLsss, UQSHLddd>;
5186 // Patterns to match llvm.arm.* intrinsic for
5187 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
5188 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
5189 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
5191 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5192 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
5193 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
5195 // Patterns to match llvm.aarch64.* intrinsic for
5196 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5197 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
5198 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
5200 // Patterns to match llvm.arm.* intrinsic for
5201 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
5202 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
5203 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
5205 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5206 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
5207 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
5209 // Patterns to match llvm.aarch64.* intrinsic for
5210 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5211 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
5212 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
5213 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
5214 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
5216 // Patterns to match llvm.arm.* intrinsic for
5217 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5218 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
5219 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
5221 // Signed Saturating Doubling Multiply-Add Long
5222 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
5223 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
5224 SQDMLALshh, SQDMLALdss>;
5226 // Signed Saturating Doubling Multiply-Subtract Long
5227 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
5228 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
5229 SQDMLSLshh, SQDMLSLdss>;
5231 // Signed Saturating Doubling Multiply Long
5232 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
5233 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
5234 SQDMULLshh, SQDMULLdss>;
5236 // Scalar Signed Integer Convert To Floating-point
5237 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
5238 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps,
5241 // Scalar Unsigned Integer Convert To Floating-point
5242 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
5243 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu,
5246 // Scalar Floating-point Converts
5247 def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
5248 def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
5251 defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
5252 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
5253 FCVTNSss, FCVTNSdd>;
5254 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
5256 defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
5257 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
5258 FCVTNUss, FCVTNUdd>;
5259 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
5261 defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
5262 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
5263 FCVTMSss, FCVTMSdd>;
5264 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
5266 defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
5267 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
5268 FCVTMUss, FCVTMUdd>;
5269 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
5271 defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
5272 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
5273 FCVTASss, FCVTASdd>;
5274 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
5276 defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
5277 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
5278 FCVTAUss, FCVTAUdd>;
5279 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
5281 defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
5282 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
5283 FCVTPSss, FCVTPSdd>;
5284 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
5286 defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
5287 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
5288 FCVTPUss, FCVTPUdd>;
5289 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
5291 defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
5292 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
5293 FCVTZSss, FCVTZSdd>;
5294 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
5297 defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
5298 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
5299 FCVTZUss, FCVTZUdd>;
5300 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
5303 // Patterns For Convert Instructions Between v1f64 and v1i64
5304 class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
5306 : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5308 class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
5310 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5312 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
5313 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
5315 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
5316 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
5318 // Scalar Floating-point Reciprocal Estimate
5319 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
5320 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe,
5321 FRECPEss, FRECPEdd>;
5322 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe,
5325 // Scalar Floating-point Reciprocal Exponent
5326 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
5327 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
5328 FRECPXss, FRECPXdd>;
5330 // Scalar Floating-point Reciprocal Square Root Estimate
5331 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
5332 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte,
5333 FRSQRTEss, FRSQRTEdd>;
5334 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte,
5337 // Scalar Floating-point Round
5338 class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
5339 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5341 def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
5342 def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
5343 def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
5344 def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
5345 def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
5346 def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
5347 def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
5349 // Scalar Integer Compare
5351 // Scalar Compare Bitwise Equal
5352 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
5353 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
5355 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
5358 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
5359 (INSTD FPR64:$Rn, FPR64:$Rm)>;
5361 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
5363 // Scalar Compare Signed Greather Than Or Equal
5364 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
5365 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
5366 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
5368 // Scalar Compare Unsigned Higher Or Same
5369 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
5370 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
5371 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
5373 // Scalar Compare Unsigned Higher
5374 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
5375 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
5376 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
5378 // Scalar Compare Signed Greater Than
5379 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
5380 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
5381 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
5383 // Scalar Compare Bitwise Test Bits
5384 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
5385 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
5386 defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
5388 // Scalar Compare Bitwise Equal To Zero
5389 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
5390 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
5392 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
5394 // Scalar Compare Signed Greather Than Or Equal To Zero
5395 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
5396 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
5398 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
5400 // Scalar Compare Signed Greater Than Zero
5401 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
5402 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
5404 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
5406 // Scalar Compare Signed Less Than Or Equal To Zero
5407 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
5408 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
5410 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
5412 // Scalar Compare Less Than Zero
5413 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
5414 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
5416 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
5418 // Scalar Floating-point Compare
5420 // Scalar Floating-point Compare Mask Equal
5421 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
5422 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
5423 FCMEQsss, v1i64, f64, FCMEQddd>;
5424 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
5426 // Scalar Floating-point Compare Mask Equal To Zero
5427 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
5428 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ,
5429 FCMEQZssi, FCMEQZddi>;
5431 // Scalar Floating-point Compare Mask Greater Than Or Equal
5432 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
5433 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
5434 FCMGEsss, v1i64, f64, FCMGEddd>;
5435 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
5437 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
5438 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
5439 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE,
5440 FCMGEZssi, FCMGEZddi>;
5442 // Scalar Floating-point Compare Mask Greather Than
5443 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
5444 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
5445 FCMGTsss, v1i64, f64, FCMGTddd>;
5446 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
5448 // Scalar Floating-point Compare Mask Greather Than Zero
5449 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
5450 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT,
5451 FCMGTZssi, FCMGTZddi>;
5453 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
5454 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
5455 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE,
5456 FCMLEZssi, FCMLEZddi>;
5458 // Scalar Floating-point Compare Mask Less Than Zero
5459 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
5460 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
5461 FCMLTZssi, FCMLTZddi>;
5463 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
5464 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
5465 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
5466 FACGEsss, v1i64, f64, FACGEddd>;
5467 def : Pat<(v1i64 (int_arm_neon_vacge (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5468 (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
5470 // Scalar Floating-point Absolute Compare Mask Greater Than
5471 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
5472 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
5473 FACGTsss, v1i64, f64, FACGTddd>;
5474 def : Pat<(v1i64 (int_arm_neon_vacgt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5475 (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
5477 // Scalar Floating-point Absolute Difference
5478 defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
5479 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
5480 FABDsss, f64, f64, FABDddd>;
5482 // Scalar Absolute Value
5483 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
5484 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
5486 // Scalar Signed Saturating Absolute Value
5487 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
5488 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
5489 SQABSbb, SQABShh, SQABSss, SQABSdd>;
5492 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
5493 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
5495 // Scalar Signed Saturating Negate
5496 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
5497 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
5498 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
5500 // Scalar Signed Saturating Accumulated of Unsigned Value
5501 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
5502 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
5504 SUQADDss, SUQADDdd>;
5506 // Scalar Unsigned Saturating Accumulated of Signed Value
5507 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
5508 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
5510 USQADDss, USQADDdd>;
5512 def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
5513 (v1i64 FPR64:$Rn))),
5514 (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
5516 def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
5517 (v1i64 FPR64:$Rn))),
5518 (USQADDdd FPR64:$Src, FPR64:$Rn)>;
5520 def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
5523 def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
5524 (SQABSdd FPR64:$Rn)>;
5526 def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
5527 (SQNEGdd FPR64:$Rn)>;
5529 def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
5530 (v1i64 FPR64:$Rn))),
5533 // Scalar Signed Saturating Extract Unsigned Narrow
5534 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
5535 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
5539 // Scalar Signed Saturating Extract Narrow
5540 defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
5541 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
5545 // Scalar Unsigned Saturating Extract Narrow
5546 defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
5547 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
5551 // Scalar Reduce Pairwise
5553 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
5554 string asmop, bit Commutable = 0> {
5555 let isCommutable = Commutable in {
5556 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
5557 (outs FPR64:$Rd), (ins VPR128:$Rn),
5558 !strconcat(asmop, "\t$Rd, $Rn.2d"),
5564 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
5565 string asmop, bit Commutable = 0>
5566 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
5567 let isCommutable = Commutable in {
5568 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
5569 (outs FPR32:$Rd), (ins VPR64:$Rn),
5570 !strconcat(asmop, "\t$Rd, $Rn.2s"),
5576 // Scalar Reduce Addition Pairwise (Integer) with
5577 // Pattern to match llvm.arm.* intrinsic
5578 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
5580 // Pattern to match llvm.aarch64.* intrinsic for
5581 // Scalar Reduce Addition Pairwise (Integer)
5582 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
5583 (ADDPvv_D_2D VPR128:$Rn)>;
5584 def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
5585 (ADDPvv_D_2D VPR128:$Rn)>;
5587 // Scalar Reduce Addition Pairwise (Floating Point)
5588 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
5590 // Scalar Reduce Maximum Pairwise (Floating Point)
5591 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
5593 // Scalar Reduce Minimum Pairwise (Floating Point)
5594 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
5596 // Scalar Reduce maxNum Pairwise (Floating Point)
5597 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
5599 // Scalar Reduce minNum Pairwise (Floating Point)
5600 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
5602 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
5604 Instruction INSTD> {
5605 def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))),
5607 def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))),
5608 (INSTD VPR128:$Rn)>;
5611 // Patterns to match llvm.aarch64.* intrinsic for
5612 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
5613 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
5614 FADDPvv_S_2S, FADDPvv_D_2D>;
5616 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
5617 FMAXPvv_S_2S, FMAXPvv_D_2D>;
5619 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
5620 FMINPvv_S_2S, FMINPvv_D_2D>;
5622 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
5623 FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
5625 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
5626 FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
5628 def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))),
5629 (FADDPvv_S_2S (v2f32
5631 (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
5634 // Scalar by element Arithmetic
5636 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
5637 string rmlane, bit u, bit szhi, bit szlo,
5638 RegisterClass ResFPR, RegisterClass OpFPR,
5639 RegisterOperand OpVPR, Operand OpImm>
5640 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5642 (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5643 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5650 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
5652 bit u, bit szhi, bit szlo,
5653 RegisterClass ResFPR,
5654 RegisterClass OpFPR,
5655 RegisterOperand OpVPR,
5657 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5659 (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5660 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5663 let Constraints = "$src = $Rd";
5668 // Scalar Floating Point multiply (scalar, by element)
5669 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
5670 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5671 let Inst{11} = Imm{1}; // h
5672 let Inst{21} = Imm{0}; // l
5673 let Inst{20-16} = MRm;
5675 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
5676 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5677 let Inst{11} = Imm{0}; // h
5678 let Inst{21} = 0b0; // l
5679 let Inst{20-16} = MRm;
5682 // Scalar Floating Point multiply extended (scalar, by element)
5683 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
5684 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5685 let Inst{11} = Imm{1}; // h
5686 let Inst{21} = Imm{0}; // l
5687 let Inst{20-16} = MRm;
5689 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
5690 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5691 let Inst{11} = Imm{0}; // h
5692 let Inst{21} = 0b0; // l
5693 let Inst{20-16} = MRm;
5696 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
5697 SDPatternOperator opnode,
5699 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5700 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5702 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5703 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
5704 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5706 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5707 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
5708 (ResTy (INST (ResTy FPRC:$Rn),
5709 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5713 def : Pat<(ResTy (opnode
5714 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5716 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5718 def : Pat<(ResTy (opnode
5719 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5721 (ResTy (INST (ResTy FPRC:$Rn),
5722 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5726 // Patterns for Scalar Floating Point multiply (scalar, by element)
5727 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
5728 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5729 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
5730 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5732 // Patterns for Scalar Floating Point multiply extended (scalar, by element)
5733 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5734 FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
5735 v2f32, v4f32, neon_uimm1_bare>;
5736 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5737 FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
5738 v1f64, v2f64, neon_uimm0_bare>;
5740 // Scalar Floating Point fused multiply-add (scalar, by element)
5741 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5742 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5743 let Inst{11} = Imm{1}; // h
5744 let Inst{21} = Imm{0}; // l
5745 let Inst{20-16} = MRm;
5747 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5748 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5749 let Inst{11} = Imm{0}; // h
5750 let Inst{21} = 0b0; // l
5751 let Inst{20-16} = MRm;
5754 // Scalar Floating Point fused multiply-subtract (scalar, by element)
5755 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5756 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5757 let Inst{11} = Imm{1}; // h
5758 let Inst{21} = Imm{0}; // l
5759 let Inst{20-16} = MRm;
5761 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5762 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5763 let Inst{11} = Imm{0}; // h
5764 let Inst{21} = 0b0; // l
5765 let Inst{20-16} = MRm;
5767 // We are allowed to match the fma instruction regardless of compile options.
5768 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
5769 Instruction FMLAI, Instruction FMLSI,
5770 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5771 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5773 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5774 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5776 (ResTy (FMLAI (ResTy FPRC:$Ra),
5777 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5779 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5780 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5782 (ResTy (FMLAI (ResTy FPRC:$Ra),
5784 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5787 // swapped fmla operands
5788 def : Pat<(ResTy (fma
5789 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5792 (ResTy (FMLAI (ResTy FPRC:$Ra),
5793 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5795 def : Pat<(ResTy (fma
5796 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5799 (ResTy (FMLAI (ResTy FPRC:$Ra),
5801 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5805 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5806 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5808 (ResTy (FMLSI (ResTy FPRC:$Ra),
5809 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5811 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5812 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5814 (ResTy (FMLSI (ResTy FPRC:$Ra),
5816 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5819 // swapped fmls operands
5820 def : Pat<(ResTy (fma
5821 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5824 (ResTy (FMLSI (ResTy FPRC:$Ra),
5825 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5827 def : Pat<(ResTy (fma
5828 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5831 (ResTy (FMLSI (ResTy FPRC:$Ra),
5833 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5837 // Scalar Floating Point fused multiply-add and
5838 // multiply-subtract (scalar, by element)
5839 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
5840 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5841 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5842 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5843 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5844 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5846 // Scalar Signed saturating doubling multiply long (scalar, by element)
5847 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5848 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5849 let Inst{11} = 0b0; // h
5850 let Inst{21} = Imm{1}; // l
5851 let Inst{20} = Imm{0}; // m
5852 let Inst{19-16} = MRm{3-0};
5854 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5855 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5856 let Inst{11} = Imm{2}; // h
5857 let Inst{21} = Imm{1}; // l
5858 let Inst{20} = Imm{0}; // m
5859 let Inst{19-16} = MRm{3-0};
5861 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5862 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5863 let Inst{11} = 0b0; // h
5864 let Inst{21} = Imm{0}; // l
5865 let Inst{20-16} = MRm;
5867 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5868 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5869 let Inst{11} = Imm{1}; // h
5870 let Inst{21} = Imm{0}; // l
5871 let Inst{20-16} = MRm;
5874 multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
5875 SDPatternOperator opnode,
5877 ValueType ResTy, RegisterClass FPRC,
5878 ValueType OpVTy, ValueType OpTy,
5879 ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5881 def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5882 (OpVTy (scalar_to_vector
5883 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
5884 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5886 def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5887 (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5888 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5891 def : Pat<(ResTy (opnode
5892 (OpVTy (scalar_to_vector
5893 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5895 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5897 def : Pat<(ResTy (opnode
5898 (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)),
5900 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5904 // Patterns for Scalar Signed saturating doubling
5905 // multiply long (scalar, by element)
5906 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5907 SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
5908 i32, VPR64Lo, neon_uimm2_bare>;
5909 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5910 SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
5911 i32, VPR128Lo, neon_uimm3_bare>;
5912 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5913 SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
5914 i32, VPR64Lo, neon_uimm1_bare>;
5915 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5916 SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
5917 i32, VPR128Lo, neon_uimm2_bare>;
5919 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
5920 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5921 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5922 let Inst{11} = 0b0; // h
5923 let Inst{21} = Imm{1}; // l
5924 let Inst{20} = Imm{0}; // m
5925 let Inst{19-16} = MRm{3-0};
5927 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5928 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5929 let Inst{11} = Imm{2}; // h
5930 let Inst{21} = Imm{1}; // l
5931 let Inst{20} = Imm{0}; // m
5932 let Inst{19-16} = MRm{3-0};
5934 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5935 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5936 let Inst{11} = 0b0; // h
5937 let Inst{21} = Imm{0}; // l
5938 let Inst{20-16} = MRm;
5940 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5941 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5942 let Inst{11} = Imm{1}; // h
5943 let Inst{21} = Imm{0}; // l
5944 let Inst{20-16} = MRm;
5947 // Scalar Signed saturating doubling
5948 // multiply-subtract long (scalar, by element)
5949 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5950 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5951 let Inst{11} = 0b0; // h
5952 let Inst{21} = Imm{1}; // l
5953 let Inst{20} = Imm{0}; // m
5954 let Inst{19-16} = MRm{3-0};
5956 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5957 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5958 let Inst{11} = Imm{2}; // h
5959 let Inst{21} = Imm{1}; // l
5960 let Inst{20} = Imm{0}; // m
5961 let Inst{19-16} = MRm{3-0};
5963 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5964 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5965 let Inst{11} = 0b0; // h
5966 let Inst{21} = Imm{0}; // l
5967 let Inst{20-16} = MRm;
5969 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5970 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5971 let Inst{11} = Imm{1}; // h
5972 let Inst{21} = Imm{0}; // l
5973 let Inst{20-16} = MRm;
5976 multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
5977 SDPatternOperator opnode,
5978 SDPatternOperator coreopnode,
5980 ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
5982 ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5984 def : Pat<(ResTy (opnode
5985 (ResTy ResFPRC:$Ra),
5986 (ResTy (coreopnode (OpTy FPRC:$Rn),
5987 (OpTy (scalar_to_vector
5988 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
5989 (ResTy (INST (ResTy ResFPRC:$Ra),
5990 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5992 def : Pat<(ResTy (opnode
5993 (ResTy ResFPRC:$Ra),
5994 (ResTy (coreopnode (OpTy FPRC:$Rn),
5995 (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)))))),
5996 (ResTy (INST (ResTy ResFPRC:$Ra),
5997 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
6000 def : Pat<(ResTy (opnode
6001 (ResTy ResFPRC:$Ra),
6003 (OpTy (scalar_to_vector
6004 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
6005 (OpTy FPRC:$Rn))))),
6006 (ResTy (INST (ResTy ResFPRC:$Ra),
6007 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
6009 def : Pat<(ResTy (opnode
6010 (ResTy ResFPRC:$Ra),
6012 (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)),
6013 (OpTy FPRC:$Rn))))),
6014 (ResTy (INST (ResTy ResFPRC:$Ra),
6015 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
6018 // Patterns for Scalar Signed saturating
6019 // doubling multiply-add long (scalar, by element)
6020 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
6021 int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
6022 i32, VPR64Lo, neon_uimm2_bare>;
6023 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
6024 int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
6025 i32, VPR128Lo, neon_uimm3_bare>;
6026 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
6027 int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
6028 i32, VPR64Lo, neon_uimm1_bare>;
6029 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
6030 int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
6031 i32, VPR128Lo, neon_uimm2_bare>;
6033 // Patterns for Scalar Signed saturating
6034 // doubling multiply-sub long (scalar, by element)
6035 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
6036 int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
6037 i32, VPR64Lo, neon_uimm2_bare>;
6038 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
6039 int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
6040 i32, VPR128Lo, neon_uimm3_bare>;
6041 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
6042 int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
6043 i32, VPR64Lo, neon_uimm1_bare>;
6044 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
6045 int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
6046 i32, VPR128Lo, neon_uimm2_bare>;
6048 // Scalar Signed saturating doubling multiply returning
6049 // high half (scalar, by element)
6050 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
6051 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
6052 let Inst{11} = 0b0; // h
6053 let Inst{21} = Imm{1}; // l
6054 let Inst{20} = Imm{0}; // m
6055 let Inst{19-16} = MRm{3-0};
6057 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
6058 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
6059 let Inst{11} = Imm{2}; // h
6060 let Inst{21} = Imm{1}; // l
6061 let Inst{20} = Imm{0}; // m
6062 let Inst{19-16} = MRm{3-0};
6064 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
6065 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
6066 let Inst{11} = 0b0; // h
6067 let Inst{21} = Imm{0}; // l
6068 let Inst{20-16} = MRm;
6070 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
6071 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
6072 let Inst{11} = Imm{1}; // h
6073 let Inst{21} = Imm{0}; // l
6074 let Inst{20-16} = MRm;
6077 // Patterns for Scalar Signed saturating doubling multiply returning
6078 // high half (scalar, by element)
6079 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
6080 SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
6081 i32, VPR64Lo, neon_uimm2_bare>;
6082 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
6083 SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
6084 i32, VPR128Lo, neon_uimm3_bare>;
6085 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
6086 SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
6087 i32, VPR64Lo, neon_uimm1_bare>;
6088 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
6089 SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
6090 i32, VPR128Lo, neon_uimm2_bare>;
6092 // Scalar Signed saturating rounding doubling multiply
6093 // returning high half (scalar, by element)
6094 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6095 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
6096 let Inst{11} = 0b0; // h
6097 let Inst{21} = Imm{1}; // l
6098 let Inst{20} = Imm{0}; // m
6099 let Inst{19-16} = MRm{3-0};
6101 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6102 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
6103 let Inst{11} = Imm{2}; // h
6104 let Inst{21} = Imm{1}; // l
6105 let Inst{20} = Imm{0}; // m
6106 let Inst{19-16} = MRm{3-0};
6108 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6109 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
6110 let Inst{11} = 0b0; // h
6111 let Inst{21} = Imm{0}; // l
6112 let Inst{20-16} = MRm;
6114 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
6115 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
6116 let Inst{11} = Imm{1}; // h
6117 let Inst{21} = Imm{0}; // l
6118 let Inst{20-16} = MRm;
6121 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6122 SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
6123 VPR64Lo, neon_uimm2_bare>;
6124 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6125 SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
6126 VPR128Lo, neon_uimm3_bare>;
6127 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6128 SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
6129 VPR64Lo, neon_uimm1_bare>;
6130 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
6131 SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
6132 VPR128Lo, neon_uimm2_bare>;
6134 // Scalar general arithmetic operation
6135 class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
6137 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
6139 class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
6141 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
6142 (INST FPR64:$Rn, FPR64:$Rm)>;
6144 class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
6146 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
6147 (v1f64 FPR64:$Ra))),
6148 (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
6150 def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
6151 def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
6152 def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
6153 def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
6154 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
6155 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
6156 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
6157 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
6158 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
6160 def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
6161 def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
6163 def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
6164 def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
6166 // Scalar Copy - DUP element to scalar
6167 class NeonI_Scalar_DUP<string asmop, string asmlane,
6168 RegisterClass ResRC, RegisterOperand VPRC,
6170 : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
6171 asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
6177 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
6178 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6180 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
6181 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6183 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
6184 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6186 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
6187 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6190 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)),
6191 (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>;
6192 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)),
6193 (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>;
6194 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)),
6195 (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>;
6196 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)),
6197 (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>;
6199 def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)),
6200 (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>;
6201 def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)),
6202 (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>;
6204 def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)),
6205 (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>;
6206 def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)),
6207 (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6210 def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)),
6211 (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>;
6213 multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
6214 ValueType ResTy, ValueType OpTy,Operand OpLImm,
6215 ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
6217 def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
6218 (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
6220 def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
6222 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6226 // Patterns for extract subvectors of v1ix data using scalar DUP instructions.
6227 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
6228 v8i8, v16i8, neon_uimm3_bare>;
6229 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
6230 v4i16, v8i16, neon_uimm2_bare>;
6231 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
6232 v2i32, v4i32, neon_uimm1_bare>;
6234 multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
6235 ValueType OpTy, ValueType ElemTy,
6236 Operand OpImm, ValueType OpNTy,
6237 ValueType ExTy, Operand OpNImm> {
6239 def : Pat<(ResTy (vector_insert (ResTy undef),
6240 (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
6241 (neon_uimm0_bare:$Imm))),
6242 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6244 def : Pat<(ResTy (vector_insert (ResTy undef),
6245 (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
6248 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6252 multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
6253 ValueType OpTy, ValueType ElemTy,
6254 Operand OpImm, ValueType OpNTy,
6255 ValueType ExTy, Operand OpNImm> {
6257 def : Pat<(ResTy (scalar_to_vector
6258 (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
6259 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6261 def : Pat<(ResTy (scalar_to_vector
6262 (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
6264 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6268 // Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
6270 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
6271 v1i64, v2i64, i64, neon_uimm1_bare,
6272 v1i64, v2i64, neon_uimm0_bare>;
6273 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
6274 v1i32, v4i32, i32, neon_uimm2_bare,
6275 v2i32, v4i32, neon_uimm1_bare>;
6276 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
6277 v1i16, v8i16, i32, neon_uimm3_bare,
6278 v4i16, v8i16, neon_uimm2_bare>;
6279 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
6280 v1i8, v16i8, i32, neon_uimm4_bare,
6281 v8i8, v16i8, neon_uimm3_bare>;
6282 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
6283 v1i64, v2i64, i64, neon_uimm1_bare,
6284 v1i64, v2i64, neon_uimm0_bare>;
6285 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
6286 v1i32, v4i32, i32, neon_uimm2_bare,
6287 v2i32, v4i32, neon_uimm1_bare>;
6288 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
6289 v1i16, v8i16, i32, neon_uimm3_bare,
6290 v4i16, v8i16, neon_uimm2_bare>;
6291 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
6292 v1i8, v16i8, i32, neon_uimm4_bare,
6293 v8i8, v16i8, neon_uimm3_bare>;
6295 multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
6296 Instruction DUPI, Operand OpImm,
6297 RegisterClass ResRC> {
6298 def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
6299 (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
6302 // Aliases for Scalar copy - DUP element (scalar)
6303 // FIXME: This is actually the preferred syntax but TableGen can't deal with
6304 // custom printing of aliases.
6305 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
6306 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
6307 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
6308 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
6310 multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
6312 def : Pat<(ResTy (GetLow VPR128:$Rn)),
6313 (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
6314 def : Pat<(ResTy (GetHigh VPR128:$Rn)),
6315 (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
6318 defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
6319 defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
6320 defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
6321 defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
6322 defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
6323 defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
6325 // The following is for sext/zext from v1xx to v1xx
6326 multiclass NeonI_ext<string prefix, SDNode ExtOp> {
6328 def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))),
6330 (v2i64 (!cast<Instruction>(prefix # "_2S")
6331 (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)),
6335 def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))),
6337 (v4i32 (!cast<Instruction>(prefix # "_4H")
6338 (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
6342 def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))),
6344 (v8i16 (!cast<Instruction>(prefix # "_8B")
6345 (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6349 defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>;
6350 defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>;
6352 // zext v1i8 -> v1i32
6353 def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))),
6354 (v1i32 (EXTRACT_SUBREG
6355 (v1i64 (SUBREG_TO_REG (i64 0),
6357 (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
6362 // zext v1i8 -> v1i64
6363 def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))),
6364 (v1i64 (SUBREG_TO_REG (i64 0),
6366 (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
6370 // zext v1i16 -> v1i64
6371 def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))),
6372 (v1i64 (SUBREG_TO_REG (i64 0),
6374 (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)),
6378 // sext v1i8 -> v1i32
6379 def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))),
6382 (v4i16 (SUBREG_TO_REG (i64 0),
6383 (v1i16 (EXTRACT_SUBREG
6385 (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6390 // sext v1i8 -> v1i64
6391 def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))),
6394 (v2i32 (SUBREG_TO_REG (i64 0),
6395 (v1i32 (EXTRACT_SUBREG
6397 (v4i16 (SUBREG_TO_REG (i64 0),
6398 (v1i16 (EXTRACT_SUBREG
6400 (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
6408 // sext v1i16 -> v1i64
6409 def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))),
6412 (v2i32 (SUBREG_TO_REG (i64 0),
6413 (v1i32 (EXTRACT_SUBREG
6415 (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
6420 //===----------------------------------------------------------------------===//
6421 // Non-Instruction Patterns
6422 //===----------------------------------------------------------------------===//
6424 // 64-bit vector bitcasts...
6426 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
6427 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
6428 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
6429 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
6431 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
6432 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
6433 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
6434 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
6436 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
6437 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
6438 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
6439 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
6441 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
6442 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
6443 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
6444 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
6446 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
6447 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
6448 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
6449 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
6451 def : Pat<(v1i64 (bitconvert (v1f64 VPR64:$src))), (v1i64 VPR64:$src)>;
6452 def : Pat<(v2f32 (bitconvert (v1f64 VPR64:$src))), (v2f32 VPR64:$src)>;
6453 def : Pat<(v2i32 (bitconvert (v1f64 VPR64:$src))), (v2i32 VPR64:$src)>;
6454 def : Pat<(v4i16 (bitconvert (v1f64 VPR64:$src))), (v4i16 VPR64:$src)>;
6455 def : Pat<(v8i8 (bitconvert (v1f64 VPR64:$src))), (v8i8 VPR64:$src)>;
6456 def : Pat<(f64 (bitconvert (v1f64 VPR64:$src))), (f64 VPR64:$src)>;
6458 def : Pat<(v1f64 (bitconvert (v1i64 VPR64:$src))), (v1f64 VPR64:$src)>;
6459 def : Pat<(v1f64 (bitconvert (v2f32 VPR64:$src))), (v1f64 VPR64:$src)>;
6460 def : Pat<(v1f64 (bitconvert (v2i32 VPR64:$src))), (v1f64 VPR64:$src)>;
6461 def : Pat<(v1f64 (bitconvert (v4i16 VPR64:$src))), (v1f64 VPR64:$src)>;
6462 def : Pat<(v1f64 (bitconvert (v8i8 VPR64:$src))), (v1f64 VPR64:$src)>;
6463 def : Pat<(v1f64 (bitconvert (f64 VPR64:$src))), (v1f64 VPR64:$src)>;
6465 // ..and 128-bit vector bitcasts...
6467 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
6468 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
6469 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
6470 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
6471 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
6473 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
6474 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
6475 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
6476 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
6477 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
6479 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
6480 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
6481 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
6482 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
6483 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
6485 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
6486 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
6487 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
6488 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
6489 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
6491 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
6492 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
6493 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
6494 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
6495 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
6497 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
6498 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
6499 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
6500 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
6501 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
6503 // ...and scalar bitcasts...
6504 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
6505 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
6506 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
6507 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
6509 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
6510 def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>;
6511 def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>;
6512 def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>;
6513 def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>;
6514 def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>;
6516 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
6518 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
6519 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
6520 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
6522 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
6523 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
6524 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
6525 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
6526 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
6528 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
6529 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
6530 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
6531 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
6532 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
6533 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
6535 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
6536 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
6537 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
6538 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
6540 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6541 def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6542 def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6543 def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6544 def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6545 def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6547 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
6549 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
6550 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
6551 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
6552 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
6553 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
6555 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
6556 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
6557 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
6558 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
6559 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
6560 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
6562 // Scalar Three Same
6564 def neon_uimm3 : Operand<i64>,
6565 ImmLeaf<i64, [{return Imm < 8;}]> {
6566 let ParserMatchClass = uimm3_asmoperand;
6567 let PrintMethod = "printUImmHexOperand";
6570 def neon_uimm4 : Operand<i64>,
6571 ImmLeaf<i64, [{return Imm < 16;}]> {
6572 let ParserMatchClass = uimm4_asmoperand;
6573 let PrintMethod = "printUImmHexOperand";
6577 class NeonI_Extract<bit q, bits<2> op2, string asmop,
6578 string OpS, RegisterOperand OpVPR, Operand OpImm>
6579 : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
6580 (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
6581 asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
6582 ", $Rm." # OpS # ", $Index",
6588 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
6589 VPR64, neon_uimm3> {
6590 let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
6593 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
6594 VPR128, neon_uimm4> {
6595 let Inst{14-11} = Index;
6598 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
6600 : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
6602 (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
6604 def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
6605 def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
6606 def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
6607 def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
6608 def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
6609 def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
6610 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
6611 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
6612 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
6613 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
6614 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
6615 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
6618 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
6619 string asmop, string OpS, RegisterOperand OpVPR,
6620 RegisterOperand VecList>
6621 : NeonI_TBL<q, op2, len, op,
6622 (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
6623 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6627 // The vectors in look up table are always 16b
6628 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
6629 def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
6630 !cast<RegisterOperand>(List # "16B_operand")>;
6632 def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
6633 !cast<RegisterOperand>(List # "16B_operand")>;
6636 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
6637 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
6638 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
6639 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
6641 // Table lookup extension
6642 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
6643 string asmop, string OpS, RegisterOperand OpVPR,
6644 RegisterOperand VecList>
6645 : NeonI_TBL<q, op2, len, op,
6646 (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
6647 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6650 let Constraints = "$src = $Rd";
6653 // The vectors in look up table are always 16b
6654 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
6655 def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
6656 !cast<RegisterOperand>(List # "16B_operand")>;
6658 def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
6659 !cast<RegisterOperand>(List # "16B_operand")>;
6662 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
6663 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
6664 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
6665 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
6667 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
6668 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
6669 : NeonI_copy<0b1, 0b0, 0b0011,
6670 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
6671 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
6672 [(set (ResTy VPR128:$Rd),
6673 (ResTy (vector_insert
6674 (ResTy VPR128:$src),
6679 let Constraints = "$src = $Rd";
6682 //Insert element (vector, from main)
6683 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
6685 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6687 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
6689 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6691 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6693 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6695 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6697 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6700 def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
6701 (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
6702 def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
6703 (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
6704 def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
6705 (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
6706 def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
6707 (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
6709 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6710 RegisterClass OpGPR, ValueType OpTy,
6711 Operand OpImm, Instruction INS>
6712 : Pat<(ResTy (vector_insert
6716 (ResTy (EXTRACT_SUBREG
6717 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6718 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6720 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6721 neon_uimm3_bare, INSbw>;
6722 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6723 neon_uimm2_bare, INShw>;
6724 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6725 neon_uimm1_bare, INSsw>;
6726 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6727 neon_uimm0_bare, INSdx>;
6729 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6730 : NeonI_insert<0b1, 0b1,
6731 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
6732 ResImm:$Immd, ResImm:$Immn),
6733 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6736 let Constraints = "$src = $Rd";
6741 //Insert element (vector, from element)
6742 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6743 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6744 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6746 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6747 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6748 let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
6749 // bit 11 is unspecified, but should be set to zero.
6751 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6752 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6753 let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
6754 // bits 11-12 are unspecified, but should be set to zero.
6756 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6757 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6758 let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
6759 // bits 11-13 are unspecified, but should be set to zero.
6762 def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
6763 (INSELb VPR128:$Rd, VPR128:$Rn,
6764 neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
6765 def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
6766 (INSELh VPR128:$Rd, VPR128:$Rn,
6767 neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
6768 def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
6769 (INSELs VPR128:$Rd, VPR128:$Rn,
6770 neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
6771 def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
6772 (INSELd VPR128:$Rd, VPR128:$Rn,
6773 neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
6775 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6776 ValueType MidTy, Operand StImm, Operand NaImm,
6778 def : Pat<(ResTy (vector_insert
6779 (ResTy VPR128:$src),
6780 (MidTy (vector_extract
6784 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6785 StImm:$Immd, StImm:$Immn)>;
6787 def : Pat <(ResTy (vector_insert
6788 (ResTy VPR128:$src),
6789 (MidTy (vector_extract
6793 (INS (ResTy VPR128:$src),
6794 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6795 StImm:$Immd, NaImm:$Immn)>;
6797 def : Pat <(NaTy (vector_insert
6799 (MidTy (vector_extract
6803 (NaTy (EXTRACT_SUBREG
6805 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6807 NaImm:$Immd, StImm:$Immn)),
6810 def : Pat <(NaTy (vector_insert
6812 (MidTy (vector_extract
6816 (NaTy (EXTRACT_SUBREG
6818 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6819 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6820 NaImm:$Immd, NaImm:$Immn)),
6824 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6825 neon_uimm1_bare, INSELs>;
6826 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6827 neon_uimm0_bare, INSELd>;
6828 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6829 neon_uimm3_bare, INSELb>;
6830 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6831 neon_uimm2_bare, INSELh>;
6832 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6833 neon_uimm1_bare, INSELs>;
6834 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6835 neon_uimm0_bare, INSELd>;
6837 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6839 RegisterClass OpFPR, Operand ResImm,
6840 SubRegIndex SubIndex, Instruction INS> {
6841 def : Pat <(ResTy (vector_insert
6842 (ResTy VPR128:$src),
6845 (INS (ResTy VPR128:$src),
6846 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6850 def : Pat <(NaTy (vector_insert
6854 (NaTy (EXTRACT_SUBREG
6856 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6857 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6863 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6865 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6868 class NeonI_SMOV<string asmop, string Res, bit Q,
6869 ValueType OpTy, ValueType eleTy,
6870 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6871 : NeonI_copy<Q, 0b0, 0b0101,
6872 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6873 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6874 [(set (ResTy ResGPR:$Rd),
6876 (ResTy (vector_extract
6877 (OpTy VPR128:$Rn), (OpImm:$Imm))),
6883 //Signed integer move (main, from element)
6884 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6886 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6888 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6890 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6892 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6894 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6896 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6898 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6900 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6902 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6905 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6906 ValueType eleTy, Operand StImm, Operand NaImm,
6907 Instruction SMOVI> {
6908 def : Pat<(i64 (sext_inreg
6910 (i32 (vector_extract
6911 (StTy VPR128:$Rn), (StImm:$Imm))))),
6913 (SMOVI VPR128:$Rn, StImm:$Imm)>;
6915 def : Pat<(i64 (sext
6916 (i32 (vector_extract
6917 (StTy VPR128:$Rn), (StImm:$Imm))))),
6918 (SMOVI VPR128:$Rn, StImm:$Imm)>;
6920 def : Pat<(i64 (sext_inreg
6921 (i64 (vector_extract
6922 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6924 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6927 def : Pat<(i64 (sext_inreg
6929 (i32 (vector_extract
6930 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6932 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6935 def : Pat<(i64 (sext
6936 (i32 (vector_extract
6937 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6938 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6942 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6943 neon_uimm3_bare, SMOVxb>;
6944 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6945 neon_uimm2_bare, SMOVxh>;
6946 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6947 neon_uimm1_bare, SMOVxs>;
6949 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6950 ValueType eleTy, Operand StImm, Operand NaImm,
6952 : Pat<(i32 (sext_inreg
6953 (i32 (vector_extract
6954 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6956 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6959 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6960 neon_uimm3_bare, SMOVwb>;
6961 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6962 neon_uimm2_bare, SMOVwh>;
6964 class NeonI_UMOV<string asmop, string Res, bit Q,
6965 ValueType OpTy, Operand OpImm,
6966 RegisterClass ResGPR, ValueType ResTy>
6967 : NeonI_copy<Q, 0b0, 0b0111,
6968 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6969 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6970 [(set (ResTy ResGPR:$Rd),
6971 (ResTy (vector_extract
6972 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6977 //Unsigned integer move (main, from element)
6978 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6980 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6982 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6984 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6986 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6988 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6990 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6992 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6995 def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
6996 (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
6997 def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
6998 (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
7000 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
7001 Operand StImm, Operand NaImm,
7003 : Pat<(ResTy (vector_extract
7004 (NaTy VPR64:$Rn), NaImm:$Imm)),
7005 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
7008 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
7009 neon_uimm3_bare, UMOVwb>;
7010 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
7011 neon_uimm2_bare, UMOVwh>;
7012 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
7013 neon_uimm1_bare, UMOVws>;
7016 (i32 (vector_extract
7017 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
7019 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
7022 (i32 (vector_extract
7023 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
7025 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
7027 def : Pat<(i64 (zext
7028 (i32 (vector_extract
7029 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
7030 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
7033 (i32 (vector_extract
7034 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
7036 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
7037 neon_uimm3_bare:$Imm)>;
7040 (i32 (vector_extract
7041 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
7043 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
7044 neon_uimm2_bare:$Imm)>;
7046 def : Pat<(i64 (zext
7047 (i32 (vector_extract
7048 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
7049 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
7050 neon_uimm0_bare:$Imm)>;
7052 // Additional copy patterns for scalar types
7053 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
7055 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
7057 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
7059 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
7061 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
7062 (FMOVws FPR32:$Rn)>;
7064 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
7065 (FMOVxd FPR64:$Rn)>;
7067 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
7070 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
7071 (v1i8 (EXTRACT_SUBREG (v16i8
7072 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
7075 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
7076 (v1i16 (EXTRACT_SUBREG (v8i16
7077 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
7080 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
7083 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
7086 def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
7087 (v8i8 (EXTRACT_SUBREG (v16i8
7088 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
7091 def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
7092 (v4i16 (EXTRACT_SUBREG (v8i16
7093 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
7096 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
7097 (v2i32 (EXTRACT_SUBREG (v16i8
7098 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
7101 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
7102 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7104 def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
7105 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7107 def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)),
7108 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7110 def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
7111 (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
7113 def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
7114 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
7115 def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
7116 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
7118 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
7121 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
7122 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
7123 (f64 FPR64:$src), sub_64)>;
7125 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
7126 RegisterOperand ResVPR, Operand OpImm>
7127 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
7128 (ins VPR128:$Rn, OpImm:$Imm),
7129 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
7135 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
7137 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
7140 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
7142 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
7145 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
7147 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
7150 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
7152 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
7155 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
7157 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
7160 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
7162 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
7165 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
7167 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
7170 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
7171 ValueType OpTy,ValueType NaTy,
7172 ValueType ExTy, Operand OpLImm,
7174 def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
7175 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
7177 def : Pat<(ResTy (Neon_vduplane
7178 (NaTy VPR64:$Rn), OpNImm:$Imm)),
7180 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
7182 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
7183 neon_uimm4_bare, neon_uimm3_bare>;
7184 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
7185 neon_uimm4_bare, neon_uimm3_bare>;
7186 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
7187 neon_uimm3_bare, neon_uimm2_bare>;
7188 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
7189 neon_uimm3_bare, neon_uimm2_bare>;
7190 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
7191 neon_uimm2_bare, neon_uimm1_bare>;
7192 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
7193 neon_uimm2_bare, neon_uimm1_bare>;
7194 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
7195 neon_uimm1_bare, neon_uimm0_bare>;
7196 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
7197 neon_uimm2_bare, neon_uimm1_bare>;
7198 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
7199 neon_uimm2_bare, neon_uimm1_bare>;
7200 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
7201 neon_uimm1_bare, neon_uimm0_bare>;
7203 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
7205 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
7207 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
7209 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
7211 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
7213 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
7216 multiclass NeonI_DUP_pattern<Instruction DUPELT, ValueType ResTy,
7217 ValueType OpTy, RegisterClass OpRC,
7218 Operand OpNImm, SubRegIndex SubIndex> {
7219 def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)),
7221 (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>;
7224 defm : NeonI_DUP_pattern<DUPELT4h, v4i16, v1i16, FPR16, neon_uimm2_bare,sub_16>;
7225 defm : NeonI_DUP_pattern<DUPELT4s, v4i32, v1i32, FPR32, neon_uimm2_bare,sub_32>;
7226 defm : NeonI_DUP_pattern<DUPELT8b, v8i8, v1i8, FPR8, neon_uimm3_bare, sub_8>;
7227 defm : NeonI_DUP_pattern<DUPELT8h, v8i16, v1i16, FPR16, neon_uimm3_bare,sub_16>;
7228 defm : NeonI_DUP_pattern<DUPELT16b, v16i8, v1i8, FPR8, neon_uimm4_bare, sub_8>;
7230 class NeonI_DUP<bit Q, string asmop, string rdlane,
7231 RegisterOperand ResVPR, ValueType ResTy,
7232 RegisterClass OpGPR, ValueType OpTy>
7233 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
7234 asmop # "\t$Rd" # rdlane # ", $Rn",
7235 [(set (ResTy ResVPR:$Rd),
7236 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
7239 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
7240 let Inst{20-16} = 0b00001;
7241 // bits 17-20 are unspecified, but should be set to zero.
7244 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
7245 let Inst{20-16} = 0b00010;
7246 // bits 18-20 are unspecified, but should be set to zero.
7249 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
7250 let Inst{20-16} = 0b00100;
7251 // bits 19-20 are unspecified, but should be set to zero.
7254 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
7255 let Inst{20-16} = 0b01000;
7256 // bit 20 is unspecified, but should be set to zero.
7259 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
7260 let Inst{20-16} = 0b00001;
7261 // bits 17-20 are unspecified, but should be set to zero.
7264 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
7265 let Inst{20-16} = 0b00010;
7266 // bits 18-20 are unspecified, but should be set to zero.
7269 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
7270 let Inst{20-16} = 0b00100;
7271 // bits 19-20 are unspecified, but should be set to zero.
7274 // patterns for CONCAT_VECTORS
7275 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
7276 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
7277 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
7278 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
7280 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
7281 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
7284 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
7286 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
7290 defm : Concat_Vector_Pattern<v16i8, v8i8>;
7291 defm : Concat_Vector_Pattern<v8i16, v4i16>;
7292 defm : Concat_Vector_Pattern<v4i32, v2i32>;
7293 defm : Concat_Vector_Pattern<v2i64, v1i64>;
7294 defm : Concat_Vector_Pattern<v4f32, v2f32>;
7295 defm : Concat_Vector_Pattern<v2f64, v1f64>;
7297 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), undef)),
7298 (v2i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32))>;
7299 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
7302 (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)),
7303 (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
7307 def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))),
7308 (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>;
7310 //patterns for EXTRACT_SUBVECTOR
7311 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
7312 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7313 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
7314 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7315 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
7316 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7317 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
7318 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7319 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
7320 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7321 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
7322 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
7324 // The followings are for instruction class (3V Elem)
7328 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
7329 string asmop, string ResS, string OpS, string EleOpS,
7330 Operand OpImm, RegisterOperand ResVPR,
7331 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7332 : NeonI_2VElem<q, u, size, opcode,
7333 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
7334 EleOpVPR:$Re, OpImm:$Index),
7335 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7336 ", $Re." # EleOpS # "[$Index]",
7342 let Constraints = "$src = $Rd";
7345 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
7346 // vector register class for element is always 128-bit to cover the max index
7347 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7348 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7349 let Inst{11} = {Index{1}};
7350 let Inst{21} = {Index{0}};
7351 let Inst{20-16} = Re;
7354 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7355 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7356 let Inst{11} = {Index{1}};
7357 let Inst{21} = {Index{0}};
7358 let Inst{20-16} = Re;
7361 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7362 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7363 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7364 let Inst{11} = {Index{2}};
7365 let Inst{21} = {Index{1}};
7366 let Inst{20} = {Index{0}};
7367 let Inst{19-16} = Re{3-0};
7370 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7371 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7372 let Inst{11} = {Index{2}};
7373 let Inst{21} = {Index{1}};
7374 let Inst{20} = {Index{0}};
7375 let Inst{19-16} = Re{3-0};
7379 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
7380 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
7382 // Pattern for lane in 128-bit vector
7383 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7384 RegisterOperand ResVPR, RegisterOperand OpVPR,
7385 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7387 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7388 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7389 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7391 // Pattern for lane in 64-bit vector
7392 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7393 RegisterOperand ResVPR, RegisterOperand OpVPR,
7394 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7396 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7397 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7398 (INST ResVPR:$src, OpVPR:$Rn,
7399 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7401 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
7403 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7404 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
7406 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7407 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
7409 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7410 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7412 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7413 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7415 // Index can only be half of the max value for lane in 64-bit vector
7417 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7418 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
7420 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7421 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7424 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
7425 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
7427 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
7428 string asmop, string ResS, string OpS, string EleOpS,
7429 Operand OpImm, RegisterOperand ResVPR,
7430 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7431 : NeonI_2VElem<q, u, size, opcode,
7432 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
7433 EleOpVPR:$Re, OpImm:$Index),
7434 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7435 ", $Re." # EleOpS # "[$Index]",
7442 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
7443 // vector register class for element is always 128-bit to cover the max index
7444 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7445 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7446 let Inst{11} = {Index{1}};
7447 let Inst{21} = {Index{0}};
7448 let Inst{20-16} = Re;
7451 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7452 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7453 let Inst{11} = {Index{1}};
7454 let Inst{21} = {Index{0}};
7455 let Inst{20-16} = Re;
7458 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7459 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7460 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7461 let Inst{11} = {Index{2}};
7462 let Inst{21} = {Index{1}};
7463 let Inst{20} = {Index{0}};
7464 let Inst{19-16} = Re{3-0};
7467 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7468 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7469 let Inst{11} = {Index{2}};
7470 let Inst{21} = {Index{1}};
7471 let Inst{20} = {Index{0}};
7472 let Inst{19-16} = Re{3-0};
7476 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
7477 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
7478 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
7480 // Pattern for lane in 128-bit vector
7481 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7482 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7483 ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7484 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7485 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7486 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7488 // Pattern for lane in 64-bit vector
7489 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7490 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7491 ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7492 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7493 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7495 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7497 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
7498 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7499 op, VPR64, VPR128, v2i32, v2i32, v4i32>;
7501 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7502 op, VPR128, VPR128, v4i32, v4i32, v4i32>;
7504 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7505 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7507 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7508 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7510 // Index can only be half of the max value for lane in 64-bit vector
7512 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7513 op, VPR64, VPR64, v2i32, v2i32, v2i32>;
7515 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7516 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7519 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
7520 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
7521 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
7525 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
7526 // vector register class for element is always 128-bit to cover the max index
7527 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7528 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7529 let Inst{11} = {Index{1}};
7530 let Inst{21} = {Index{0}};
7531 let Inst{20-16} = Re;
7534 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7535 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7536 let Inst{11} = {Index{1}};
7537 let Inst{21} = {Index{0}};
7538 let Inst{20-16} = Re;
7541 // _1d2d doesn't exist!
7543 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7544 neon_uimm1_bare, VPR128, VPR128, VPR128> {
7545 let Inst{11} = {Index{0}};
7547 let Inst{20-16} = Re;
7551 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
7552 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
7554 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
7555 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7556 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
7557 SDPatternOperator coreop>
7558 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7559 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
7561 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
7563 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
7564 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7565 op, VPR64, VPR128, v2f32, v2f32, v4f32>;
7567 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7568 op, VPR128, VPR128, v4f32, v4f32, v4f32>;
7570 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7571 op, VPR128, VPR128, v2f64, v2f64, v2f64>;
7573 // Index can only be half of the max value for lane in 64-bit vector
7575 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7576 op, VPR64, VPR64, v2f32, v2f32, v2f32>;
7578 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7579 op, VPR128, VPR64, v2f64, v2f64, v1f64,
7580 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7583 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
7584 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
7586 def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
7587 (v2f32 VPR64:$Rn))),
7588 (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7590 def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
7591 (v4f32 VPR128:$Rn))),
7592 (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7594 def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
7595 (v2f64 VPR128:$Rn))),
7596 (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
7598 // The followings are patterns using fma
7599 // -ffp-contract=fast generates fma
7601 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
7602 // vector register class for element is always 128-bit to cover the max index
7603 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7604 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7605 let Inst{11} = {Index{1}};
7606 let Inst{21} = {Index{0}};
7607 let Inst{20-16} = Re;
7610 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7611 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7612 let Inst{11} = {Index{1}};
7613 let Inst{21} = {Index{0}};
7614 let Inst{20-16} = Re;
7617 // _1d2d doesn't exist!
7619 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7620 neon_uimm1_bare, VPR128, VPR128, VPR128> {
7621 let Inst{11} = {Index{0}};
7623 let Inst{20-16} = Re;
7627 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
7628 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
7630 // Pattern for lane in 128-bit vector
7631 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7632 RegisterOperand ResVPR, RegisterOperand OpVPR,
7633 ValueType ResTy, ValueType OpTy,
7634 SDPatternOperator coreop>
7635 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7636 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7637 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
7639 // Pattern for lane 0
7640 class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
7641 RegisterOperand ResVPR, ValueType ResTy>
7642 : Pat<(ResTy (op (ResTy ResVPR:$Rn),
7643 (ResTy (Neon_vdup (f32 FPR32:$Re))),
7644 (ResTy ResVPR:$src))),
7645 (INST ResVPR:$src, ResVPR:$Rn,
7646 (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7648 // Pattern for lane in 64-bit vector
7649 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7650 RegisterOperand ResVPR, RegisterOperand OpVPR,
7651 ValueType ResTy, ValueType OpTy,
7652 SDPatternOperator coreop>
7653 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7654 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7655 (INST ResVPR:$src, ResVPR:$Rn,
7656 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
7658 // Pattern for lane in 64-bit vector
7659 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
7660 SDPatternOperator op,
7661 RegisterOperand ResVPR, RegisterOperand OpVPR,
7662 ValueType ResTy, ValueType OpTy,
7663 SDPatternOperator coreop>
7664 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
7665 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7666 (INST ResVPR:$src, ResVPR:$Rn,
7667 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
7670 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
7671 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7672 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7673 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7675 def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
7678 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7679 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7680 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7682 def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
7685 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7686 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7687 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7689 // Index can only be half of the max value for lane in 64-bit vector
7691 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7692 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7693 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7695 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7696 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7697 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7700 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
7702 // Pattern for lane 0
7703 class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
7704 RegisterOperand ResVPR, ValueType ResTy>
7705 : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
7706 (ResTy (Neon_vdup (f32 FPR32:$Re))),
7707 (ResTy ResVPR:$src))),
7708 (INST ResVPR:$src, ResVPR:$Rn,
7709 (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7711 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
7713 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7714 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7715 BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7717 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7718 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7719 BinOpFrag<(Neon_vduplane
7720 (fneg node:$LHS), node:$RHS)>>;
7722 def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
7725 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7726 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7727 BinOpFrag<(fneg (Neon_vduplane
7728 node:$LHS, node:$RHS))>>;
7730 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7731 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7732 BinOpFrag<(Neon_vduplane
7733 (fneg node:$LHS), node:$RHS)>>;
7735 def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
7738 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7739 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7740 BinOpFrag<(fneg (Neon_vduplane
7741 node:$LHS, node:$RHS))>>;
7743 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7744 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7745 BinOpFrag<(Neon_vduplane
7746 (fneg node:$LHS), node:$RHS)>>;
7748 // Index can only be half of the max value for lane in 64-bit vector
7750 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7751 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7752 BinOpFrag<(fneg (Neon_vduplane
7753 node:$LHS, node:$RHS))>>;
7755 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7756 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7757 BinOpFrag<(Neon_vduplane
7758 (fneg node:$LHS), node:$RHS)>>;
7760 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7761 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7762 BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7764 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7765 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7766 BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
7768 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7769 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7770 BinOpFrag<(fneg (Neon_combine_2d
7771 node:$LHS, node:$RHS))>>;
7773 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7774 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7775 BinOpFrag<(Neon_combine_2d
7776 (fneg node:$LHS), (fneg node:$RHS))>>;
7779 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
7781 // Variant 3: Long type
7782 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
7783 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
7785 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
7786 // vector register class for element is always 128-bit to cover the max index
7787 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7788 neon_uimm2_bare, VPR128, VPR64, VPR128> {
7789 let Inst{11} = {Index{1}};
7790 let Inst{21} = {Index{0}};
7791 let Inst{20-16} = Re;
7794 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7795 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7796 let Inst{11} = {Index{1}};
7797 let Inst{21} = {Index{0}};
7798 let Inst{20-16} = Re;
7801 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7802 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7803 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7804 let Inst{11} = {Index{2}};
7805 let Inst{21} = {Index{1}};
7806 let Inst{20} = {Index{0}};
7807 let Inst{19-16} = Re{3-0};
7810 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7811 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7812 let Inst{11} = {Index{2}};
7813 let Inst{21} = {Index{1}};
7814 let Inst{20} = {Index{0}};
7815 let Inst{19-16} = Re{3-0};
7819 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
7820 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
7821 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
7822 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
7823 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
7824 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
7826 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
7827 // vector register class for element is always 128-bit to cover the max index
7828 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7829 neon_uimm2_bare, VPR128, VPR64, VPR128> {
7830 let Inst{11} = {Index{1}};
7831 let Inst{21} = {Index{0}};
7832 let Inst{20-16} = Re;
7835 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7836 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7837 let Inst{11} = {Index{1}};
7838 let Inst{21} = {Index{0}};
7839 let Inst{20-16} = Re;
7842 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7843 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7844 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7845 let Inst{11} = {Index{2}};
7846 let Inst{21} = {Index{1}};
7847 let Inst{20} = {Index{0}};
7848 let Inst{19-16} = Re{3-0};
7851 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7852 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7853 let Inst{11} = {Index{2}};
7854 let Inst{21} = {Index{1}};
7855 let Inst{20} = {Index{0}};
7856 let Inst{19-16} = Re{3-0};
7860 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
7861 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
7862 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
7864 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
7867 // Pattern for lane in 128-bit vector
7868 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7869 RegisterOperand EleOpVPR, ValueType ResTy,
7870 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7871 SDPatternOperator hiop>
7872 : Pat<(ResTy (op (ResTy VPR128:$src),
7873 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7874 (HalfOpTy (Neon_vduplane
7875 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7876 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7878 // Pattern for lane in 64-bit vector
7879 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7880 RegisterOperand EleOpVPR, ValueType ResTy,
7881 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7882 SDPatternOperator hiop>
7883 : Pat<(ResTy (op (ResTy VPR128:$src),
7884 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7885 (HalfOpTy (Neon_vduplane
7886 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7887 (INST VPR128:$src, VPR128:$Rn,
7888 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7890 class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
7891 ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7892 SDPatternOperator hiop, Instruction DupInst>
7893 : Pat<(ResTy (op (ResTy VPR128:$src),
7894 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7895 (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7896 (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
7898 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
7899 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7900 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7902 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7903 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
7905 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7906 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7908 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7909 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7911 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7912 op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7914 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7915 op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7917 // Index can only be half of the max value for lane in 64-bit vector
7919 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7920 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7922 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7923 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
7925 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7926 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7928 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7929 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7932 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
7933 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
7934 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
7935 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
7937 // Pattern for lane in 128-bit vector
7938 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7939 RegisterOperand EleOpVPR, ValueType ResTy,
7940 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7941 SDPatternOperator hiop>
7943 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7944 (HalfOpTy (Neon_vduplane
7945 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7946 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7948 // Pattern for lane in 64-bit vector
7949 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7950 RegisterOperand EleOpVPR, ValueType ResTy,
7951 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7952 SDPatternOperator hiop>
7954 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7955 (HalfOpTy (Neon_vduplane
7956 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7958 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7960 // Pattern for fixed lane 0
7961 class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
7962 ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7963 SDPatternOperator hiop, Instruction DupInst>
7965 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7966 (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7967 (INST VPR128:$Rn, (DupInst $Re), 0)>;
7969 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
7970 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7971 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7973 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7974 op, VPR64, VPR128, v2i64, v2i32, v4i32>;
7976 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7977 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7979 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7980 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7982 def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
7983 op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7985 def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
7986 op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7988 // Index can only be half of the max value for lane in 64-bit vector
7990 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7991 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7993 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7994 op, VPR64, VPR64, v2i64, v2i32, v2i32>;
7996 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7997 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7999 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
8000 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
8003 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
8004 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
8005 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
8007 multiclass NI_qdma<SDPatternOperator op> {
8008 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
8010 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
8012 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
8014 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
8017 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
8018 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
8020 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
8021 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
8022 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
8023 v4i32, v4i16, v8i16>;
8025 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
8026 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
8027 v2i64, v2i32, v4i32>;
8029 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
8030 !cast<PatFrag>(op # "_4s"), VPR128Lo,
8031 v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
8033 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
8034 !cast<PatFrag>(op # "_2d"), VPR128,
8035 v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
8037 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
8038 !cast<PatFrag>(op # "_4s"),
8039 v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
8041 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
8042 !cast<PatFrag>(op # "_2d"),
8043 v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
8045 // Index can only be half of the max value for lane in 64-bit vector
8047 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
8048 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
8049 v4i32, v4i16, v4i16>;
8051 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
8052 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
8053 v2i64, v2i32, v2i32>;
8055 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
8056 !cast<PatFrag>(op # "_4s"), VPR64Lo,
8057 v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
8059 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
8060 !cast<PatFrag>(op # "_2d"), VPR64,
8061 v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
8064 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
8065 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
8067 // End of implementation for instruction class (3V Elem)
8069 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
8070 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
8071 SDPatternOperator Neon_Rev>
8072 : NeonI_2VMisc<Q, U, size, opcode,
8073 (outs ResVPR:$Rd), (ins ResVPR:$Rn),
8074 asmop # "\t$Rd." # Res # ", $Rn." # Res,
8075 [(set (ResTy ResVPR:$Rd),
8076 (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
8079 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
8081 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
8083 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
8085 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
8087 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
8089 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
8092 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
8093 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
8095 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
8097 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
8099 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
8101 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
8104 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
8106 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
8109 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
8110 SDPatternOperator Neon_Padd> {
8111 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8112 (outs VPR128:$Rd), (ins VPR128:$Rn),
8113 asmop # "\t$Rd.8h, $Rn.16b",
8114 [(set (v8i16 VPR128:$Rd),
8115 (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
8118 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8119 (outs VPR64:$Rd), (ins VPR64:$Rn),
8120 asmop # "\t$Rd.4h, $Rn.8b",
8121 [(set (v4i16 VPR64:$Rd),
8122 (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
8125 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8126 (outs VPR128:$Rd), (ins VPR128:$Rn),
8127 asmop # "\t$Rd.4s, $Rn.8h",
8128 [(set (v4i32 VPR128:$Rd),
8129 (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
8132 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8133 (outs VPR64:$Rd), (ins VPR64:$Rn),
8134 asmop # "\t$Rd.2s, $Rn.4h",
8135 [(set (v2i32 VPR64:$Rd),
8136 (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
8139 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8140 (outs VPR128:$Rd), (ins VPR128:$Rn),
8141 asmop # "\t$Rd.2d, $Rn.4s",
8142 [(set (v2i64 VPR128:$Rd),
8143 (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
8146 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8147 (outs VPR64:$Rd), (ins VPR64:$Rn),
8148 asmop # "\t$Rd.1d, $Rn.2s",
8149 [(set (v1i64 VPR64:$Rd),
8150 (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
8154 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
8155 int_arm_neon_vpaddls>;
8156 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
8157 int_arm_neon_vpaddlu>;
8159 def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
8161 def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
8164 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
8165 SDPatternOperator Neon_Padd> {
8166 let Constraints = "$src = $Rd" in {
8167 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8168 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8169 asmop # "\t$Rd.8h, $Rn.16b",
8170 [(set (v8i16 VPR128:$Rd),
8172 (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
8175 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8176 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8177 asmop # "\t$Rd.4h, $Rn.8b",
8178 [(set (v4i16 VPR64:$Rd),
8180 (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
8183 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8184 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8185 asmop # "\t$Rd.4s, $Rn.8h",
8186 [(set (v4i32 VPR128:$Rd),
8188 (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
8191 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8192 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8193 asmop # "\t$Rd.2s, $Rn.4h",
8194 [(set (v2i32 VPR64:$Rd),
8196 (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
8199 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8200 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8201 asmop # "\t$Rd.2d, $Rn.4s",
8202 [(set (v2i64 VPR128:$Rd),
8204 (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
8207 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8208 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8209 asmop # "\t$Rd.1d, $Rn.2s",
8210 [(set (v1i64 VPR64:$Rd),
8212 (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
8217 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
8218 int_arm_neon_vpadals>;
8219 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
8220 int_arm_neon_vpadalu>;
8222 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
8223 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8224 (outs VPR128:$Rd), (ins VPR128:$Rn),
8225 asmop # "\t$Rd.16b, $Rn.16b",
8228 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8229 (outs VPR128:$Rd), (ins VPR128:$Rn),
8230 asmop # "\t$Rd.8h, $Rn.8h",
8233 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8234 (outs VPR128:$Rd), (ins VPR128:$Rn),
8235 asmop # "\t$Rd.4s, $Rn.4s",
8238 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8239 (outs VPR128:$Rd), (ins VPR128:$Rn),
8240 asmop # "\t$Rd.2d, $Rn.2d",
8243 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8244 (outs VPR64:$Rd), (ins VPR64:$Rn),
8245 asmop # "\t$Rd.8b, $Rn.8b",
8248 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8249 (outs VPR64:$Rd), (ins VPR64:$Rn),
8250 asmop # "\t$Rd.4h, $Rn.4h",
8253 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8254 (outs VPR64:$Rd), (ins VPR64:$Rn),
8255 asmop # "\t$Rd.2s, $Rn.2s",
8259 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
8260 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
8261 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
8262 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
8264 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
8265 SDPatternOperator Neon_Op> {
8266 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
8267 (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
8269 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
8270 (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
8272 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
8273 (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
8275 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
8276 (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
8278 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
8279 (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
8281 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
8282 (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
8284 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
8285 (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
8288 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
8289 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
8290 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
8292 def : Pat<(v16i8 (sub
8293 (v16i8 Neon_AllZero),
8294 (v16i8 VPR128:$Rn))),
8295 (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
8296 def : Pat<(v8i8 (sub
8297 (v8i8 Neon_AllZero),
8299 (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
8300 def : Pat<(v8i16 (sub
8301 (v8i16 (bitconvert (v16i8 Neon_AllZero))),
8302 (v8i16 VPR128:$Rn))),
8303 (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
8304 def : Pat<(v4i16 (sub
8305 (v4i16 (bitconvert (v8i8 Neon_AllZero))),
8306 (v4i16 VPR64:$Rn))),
8307 (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
8308 def : Pat<(v4i32 (sub
8309 (v4i32 (bitconvert (v16i8 Neon_AllZero))),
8310 (v4i32 VPR128:$Rn))),
8311 (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
8312 def : Pat<(v2i32 (sub
8313 (v2i32 (bitconvert (v8i8 Neon_AllZero))),
8314 (v2i32 VPR64:$Rn))),
8315 (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
8316 def : Pat<(v2i64 (sub
8317 (v2i64 (bitconvert (v16i8 Neon_AllZero))),
8318 (v2i64 VPR128:$Rn))),
8319 (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
8321 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
8322 let Constraints = "$src = $Rd" in {
8323 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8324 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8325 asmop # "\t$Rd.16b, $Rn.16b",
8328 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8329 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8330 asmop # "\t$Rd.8h, $Rn.8h",
8333 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8334 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8335 asmop # "\t$Rd.4s, $Rn.4s",
8338 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8339 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8340 asmop # "\t$Rd.2d, $Rn.2d",
8343 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8344 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8345 asmop # "\t$Rd.8b, $Rn.8b",
8348 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8349 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8350 asmop # "\t$Rd.4h, $Rn.4h",
8353 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8354 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
8355 asmop # "\t$Rd.2s, $Rn.2s",
8360 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
8361 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
8363 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
8364 SDPatternOperator Neon_Op> {
8365 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
8366 (v16i8 (!cast<Instruction>(Prefix # 16b)
8367 (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
8369 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
8370 (v8i16 (!cast<Instruction>(Prefix # 8h)
8371 (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
8373 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
8374 (v4i32 (!cast<Instruction>(Prefix # 4s)
8375 (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
8377 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
8378 (v2i64 (!cast<Instruction>(Prefix # 2d)
8379 (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
8381 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
8382 (v8i8 (!cast<Instruction>(Prefix # 8b)
8383 (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
8385 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
8386 (v4i16 (!cast<Instruction>(Prefix # 4h)
8387 (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
8389 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
8390 (v2i32 (!cast<Instruction>(Prefix # 2s)
8391 (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
8394 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
8395 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
8397 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
8398 SDPatternOperator Neon_Op> {
8399 def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
8400 (outs VPR128:$Rd), (ins VPR128:$Rn),
8401 asmop # "\t$Rd.16b, $Rn.16b",
8402 [(set (v16i8 VPR128:$Rd),
8403 (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
8406 def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
8407 (outs VPR128:$Rd), (ins VPR128:$Rn),
8408 asmop # "\t$Rd.8h, $Rn.8h",
8409 [(set (v8i16 VPR128:$Rd),
8410 (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
8413 def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
8414 (outs VPR128:$Rd), (ins VPR128:$Rn),
8415 asmop # "\t$Rd.4s, $Rn.4s",
8416 [(set (v4i32 VPR128:$Rd),
8417 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8420 def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
8421 (outs VPR64:$Rd), (ins VPR64:$Rn),
8422 asmop # "\t$Rd.8b, $Rn.8b",
8423 [(set (v8i8 VPR64:$Rd),
8424 (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
8427 def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
8428 (outs VPR64:$Rd), (ins VPR64:$Rn),
8429 asmop # "\t$Rd.4h, $Rn.4h",
8430 [(set (v4i16 VPR64:$Rd),
8431 (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
8434 def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
8435 (outs VPR64:$Rd), (ins VPR64:$Rn),
8436 asmop # "\t$Rd.2s, $Rn.2s",
8437 [(set (v2i32 VPR64:$Rd),
8438 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8442 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
8443 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
8445 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
8447 def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
8448 (outs VPR128:$Rd), (ins VPR128:$Rn),
8449 asmop # "\t$Rd.16b, $Rn.16b",
8452 def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
8453 (outs VPR64:$Rd), (ins VPR64:$Rn),
8454 asmop # "\t$Rd.8b, $Rn.8b",
8458 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
8459 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
8460 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
8462 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
8463 (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
8464 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
8465 (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
8467 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
8468 (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
8469 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
8470 (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
8472 def : Pat<(v16i8 (xor
8474 (v16i8 Neon_AllOne))),
8475 (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
8476 def : Pat<(v8i8 (xor
8478 (v8i8 Neon_AllOne))),
8479 (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
8480 def : Pat<(v8i16 (xor
8482 (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
8483 (NOT16b VPR128:$Rn)>;
8484 def : Pat<(v4i16 (xor
8486 (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
8488 def : Pat<(v4i32 (xor
8490 (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
8491 (NOT16b VPR128:$Rn)>;
8492 def : Pat<(v2i32 (xor
8494 (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
8496 def : Pat<(v2i64 (xor
8498 (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
8499 (NOT16b VPR128:$Rn)>;
8501 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
8502 (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
8503 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
8504 (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
8506 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
8507 SDPatternOperator Neon_Op> {
8508 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8509 (outs VPR128:$Rd), (ins VPR128:$Rn),
8510 asmop # "\t$Rd.4s, $Rn.4s",
8511 [(set (v4f32 VPR128:$Rd),
8512 (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
8515 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8516 (outs VPR128:$Rd), (ins VPR128:$Rn),
8517 asmop # "\t$Rd.2d, $Rn.2d",
8518 [(set (v2f64 VPR128:$Rd),
8519 (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
8522 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8523 (outs VPR64:$Rd), (ins VPR64:$Rn),
8524 asmop # "\t$Rd.2s, $Rn.2s",
8525 [(set (v2f32 VPR64:$Rd),
8526 (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
8530 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
8531 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
8533 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
8534 def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8535 (outs VPR64:$Rd), (ins VPR128:$Rn),
8536 asmop # "\t$Rd.8b, $Rn.8h",
8539 def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8540 (outs VPR64:$Rd), (ins VPR128:$Rn),
8541 asmop # "\t$Rd.4h, $Rn.4s",
8544 def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8545 (outs VPR64:$Rd), (ins VPR128:$Rn),
8546 asmop # "\t$Rd.2s, $Rn.2d",
8549 let Constraints = "$Rd = $src" in {
8550 def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8551 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8552 asmop # "2\t$Rd.16b, $Rn.8h",
8555 def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8556 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8557 asmop # "2\t$Rd.8h, $Rn.4s",
8560 def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8561 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8562 asmop # "2\t$Rd.4s, $Rn.2d",
8567 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
8568 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
8569 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
8570 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
8572 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
8573 SDPatternOperator Neon_Op> {
8574 def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
8575 (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
8577 def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
8578 (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
8580 def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
8581 (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
8583 def : Pat<(v16i8 (concat_vectors
8585 (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
8586 (!cast<Instruction>(Prefix # 8h16b)
8587 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8590 def : Pat<(v8i16 (concat_vectors
8592 (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
8593 (!cast<Instruction>(Prefix # 4s8h)
8594 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8597 def : Pat<(v4i32 (concat_vectors
8599 (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
8600 (!cast<Instruction>(Prefix # 2d4s)
8601 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8605 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
8606 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
8607 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
8608 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
8610 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
8611 let DecoderMethod = "DecodeSHLLInstruction" in {
8612 def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8614 (ins VPR64:$Rn, uimm_exact8:$Imm),
8615 asmop # "\t$Rd.8h, $Rn.8b, $Imm",
8618 def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8620 (ins VPR64:$Rn, uimm_exact16:$Imm),
8621 asmop # "\t$Rd.4s, $Rn.4h, $Imm",
8624 def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8626 (ins VPR64:$Rn, uimm_exact32:$Imm),
8627 asmop # "\t$Rd.2d, $Rn.2s, $Imm",
8630 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8632 (ins VPR128:$Rn, uimm_exact8:$Imm),
8633 asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
8636 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8638 (ins VPR128:$Rn, uimm_exact16:$Imm),
8639 asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
8642 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8644 (ins VPR128:$Rn, uimm_exact32:$Imm),
8645 asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
8650 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
8652 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
8653 SDPatternOperator ExtOp, Operand Neon_Imm,
8656 (DesTy (ExtOp (OpTy VPR64:$Rn))),
8658 (i32 Neon_Imm:$Imm))))),
8659 (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
8661 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
8662 SDPatternOperator ExtOp, Operand Neon_Imm,
8663 string suffix, PatFrag GetHigh>
8666 (OpTy (GetHigh VPR128:$Rn)))),
8668 (i32 Neon_Imm:$Imm))))),
8669 (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
8671 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
8672 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
8673 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
8674 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
8675 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
8676 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
8677 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
8679 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
8681 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
8683 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
8685 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
8687 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
8690 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
8691 def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8692 (outs VPR64:$Rd), (ins VPR128:$Rn),
8693 asmop # "\t$Rd.4h, $Rn.4s",
8696 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8697 (outs VPR64:$Rd), (ins VPR128:$Rn),
8698 asmop # "\t$Rd.2s, $Rn.2d",
8701 let Constraints = "$src = $Rd" in {
8702 def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8703 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8704 asmop # "2\t$Rd.8h, $Rn.4s",
8707 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8708 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8709 asmop # "2\t$Rd.4s, $Rn.2d",
8714 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
8716 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
8717 SDPatternOperator f32_to_f16_Op,
8718 SDPatternOperator f64_to_f32_Op> {
8720 def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
8721 (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
8723 def : Pat<(v8i16 (concat_vectors
8725 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
8726 (!cast<Instruction>(prefix # "4s8h")
8727 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8728 (v4f32 VPR128:$Rn))>;
8730 def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
8731 (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
8733 def : Pat<(v4f32 (concat_vectors
8735 (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
8736 (!cast<Instruction>(prefix # "2d4s")
8737 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8738 (v2f64 VPR128:$Rn))>;
8741 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
8743 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
8745 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8746 (outs VPR64:$Rd), (ins VPR128:$Rn),
8747 asmop # "\t$Rd.2s, $Rn.2d",
8750 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8751 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8752 asmop # "2\t$Rd.4s, $Rn.2d",
8754 let Constraints = "$src = $Rd";
8757 def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
8758 (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
8760 def : Pat<(v4f32 (concat_vectors
8762 (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
8763 (!cast<Instruction>(prefix # "2d4s")
8764 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8768 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
8770 def Neon_High4Float : PatFrag<(ops node:$in),
8771 (extract_subvector (v4f32 node:$in), (iPTR 2))>;
8773 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
8774 def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
8775 (outs VPR128:$Rd), (ins VPR64:$Rn),
8776 asmop # "\t$Rd.4s, $Rn.4h",
8779 def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
8780 (outs VPR128:$Rd), (ins VPR64:$Rn),
8781 asmop # "\t$Rd.2d, $Rn.2s",
8784 def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
8785 (outs VPR128:$Rd), (ins VPR128:$Rn),
8786 asmop # "2\t$Rd.4s, $Rn.8h",
8789 def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
8790 (outs VPR128:$Rd), (ins VPR128:$Rn),
8791 asmop # "2\t$Rd.2d, $Rn.4s",
8795 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
8797 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
8798 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
8799 (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
8801 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
8803 (v8i16 VPR128:$Rn))))),
8804 (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
8806 def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
8807 (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
8809 def : Pat<(v2f64 (fextend
8810 (v2f32 (Neon_High4Float
8811 (v4f32 VPR128:$Rn))))),
8812 (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
8815 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
8817 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
8818 ValueType ResTy4s, ValueType OpTy4s,
8819 ValueType ResTy2d, ValueType OpTy2d,
8820 ValueType ResTy2s, ValueType OpTy2s,
8821 SDPatternOperator Neon_Op> {
8823 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8824 (outs VPR128:$Rd), (ins VPR128:$Rn),
8825 asmop # "\t$Rd.4s, $Rn.4s",
8826 [(set (ResTy4s VPR128:$Rd),
8827 (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
8830 def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
8831 (outs VPR128:$Rd), (ins VPR128:$Rn),
8832 asmop # "\t$Rd.2d, $Rn.2d",
8833 [(set (ResTy2d VPR128:$Rd),
8834 (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
8837 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8838 (outs VPR64:$Rd), (ins VPR64:$Rn),
8839 asmop # "\t$Rd.2s, $Rn.2s",
8840 [(set (ResTy2s VPR64:$Rd),
8841 (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
8845 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
8846 bits<5> opcode, SDPatternOperator Neon_Op> {
8847 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
8848 v2f64, v2i32, v2f32, Neon_Op>;
8851 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
8852 int_arm_neon_vcvtns>;
8853 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
8854 int_arm_neon_vcvtnu>;
8855 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
8856 int_arm_neon_vcvtps>;
8857 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
8858 int_arm_neon_vcvtpu>;
8859 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
8860 int_arm_neon_vcvtms>;
8861 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
8862 int_arm_neon_vcvtmu>;
8863 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
8864 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
8865 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
8866 int_arm_neon_vcvtas>;
8867 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
8868 int_arm_neon_vcvtau>;
8870 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
8871 bits<5> opcode, SDPatternOperator Neon_Op> {
8872 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
8873 v2i64, v2f32, v2i32, Neon_Op>;
8876 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
8877 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
8879 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
8880 bits<5> opcode, SDPatternOperator Neon_Op> {
8881 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
8882 v2f64, v2f32, v2f32, Neon_Op>;
8885 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
8886 int_aarch64_neon_frintn>;
8887 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
8888 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
8889 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
8890 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
8891 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
8892 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
8893 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
8894 int_arm_neon_vrecpe>;
8895 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
8896 int_arm_neon_vrsqrte>;
8897 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
8899 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
8900 bits<5> opcode, SDPatternOperator Neon_Op> {
8901 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8902 (outs VPR128:$Rd), (ins VPR128:$Rn),
8903 asmop # "\t$Rd.4s, $Rn.4s",
8904 [(set (v4i32 VPR128:$Rd),
8905 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8908 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8909 (outs VPR64:$Rd), (ins VPR64:$Rn),
8910 asmop # "\t$Rd.2s, $Rn.2s",
8911 [(set (v2i32 VPR64:$Rd),
8912 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8916 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
8917 int_arm_neon_vrecpe>;
8918 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
8919 int_arm_neon_vrsqrte>;
8922 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
8923 string asmop, SDPatternOperator opnode>
8924 : NeonI_Crypto_AES<size, opcode,
8925 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8926 asmop # "\t$Rd.16b, $Rn.16b",
8927 [(set (v16i8 VPR128:$Rd),
8928 (v16i8 (opnode (v16i8 VPR128:$src),
8929 (v16i8 VPR128:$Rn))))],
8931 let Constraints = "$src = $Rd";
8932 let Predicates = [HasNEON, HasCrypto];
8935 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
8936 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
8938 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
8939 string asmop, SDPatternOperator opnode>
8940 : NeonI_Crypto_AES<size, opcode,
8941 (outs VPR128:$Rd), (ins VPR128:$Rn),
8942 asmop # "\t$Rd.16b, $Rn.16b",
8943 [(set (v16i8 VPR128:$Rd),
8944 (v16i8 (opnode (v16i8 VPR128:$Rn))))],
8947 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
8948 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
8950 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
8951 string asmop, SDPatternOperator opnode>
8952 : NeonI_Crypto_SHA<size, opcode,
8953 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8954 asmop # "\t$Rd.4s, $Rn.4s",
8955 [(set (v4i32 VPR128:$Rd),
8956 (v4i32 (opnode (v4i32 VPR128:$src),
8957 (v4i32 VPR128:$Rn))))],
8959 let Constraints = "$src = $Rd";
8960 let Predicates = [HasNEON, HasCrypto];
8963 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
8964 int_arm_neon_sha1su1>;
8965 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
8966 int_arm_neon_sha256su0>;
8968 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
8969 string asmop, SDPatternOperator opnode>
8970 : NeonI_Crypto_SHA<size, opcode,
8971 (outs FPR32:$Rd), (ins FPR32:$Rn),
8972 asmop # "\t$Rd, $Rn",
8974 let Predicates = [HasNEON, HasCrypto];
8975 let hasSideEffects = 0;
8978 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
8979 def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
8980 (COPY_TO_REGCLASS (SHA1H (COPY_TO_REGCLASS i32:$Rn, FPR32)), GPR32)>;
8983 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
8984 SDPatternOperator opnode>
8985 : NeonI_Crypto_3VSHA<size, opcode,
8987 (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
8988 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
8989 [(set (v4i32 VPR128:$Rd),
8990 (v4i32 (opnode (v4i32 VPR128:$src),
8992 (v4i32 VPR128:$Rm))))],
8994 let Constraints = "$src = $Rd";
8995 let Predicates = [HasNEON, HasCrypto];
8998 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
8999 int_arm_neon_sha1su0>;
9000 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
9001 int_arm_neon_sha256su1>;
9003 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
9004 SDPatternOperator opnode>
9005 : NeonI_Crypto_3VSHA<size, opcode,
9007 (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
9008 asmop # "\t$Rd, $Rn, $Rm.4s",
9009 [(set (v4i32 FPR128:$Rd),
9010 (v4i32 (opnode (v4i32 FPR128:$src),
9012 (v4i32 VPR128:$Rm))))],
9014 let Constraints = "$src = $Rd";
9015 let Predicates = [HasNEON, HasCrypto];
9018 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
9019 int_arm_neon_sha256h>;
9020 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
9021 int_arm_neon_sha256h2>;
9023 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop>
9024 : NeonI_Crypto_3VSHA<size, opcode,
9026 (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
9027 asmop # "\t$Rd, $Rn, $Rm.4s",
9029 let Constraints = "$src = $Rd";
9030 let hasSideEffects = 0;
9031 let Predicates = [HasNEON, HasCrypto];
9034 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c">;
9035 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p">;
9036 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m">;
9038 def : Pat<(int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
9039 (SHA1C v4i32:$hash_abcd,
9040 (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
9041 def : Pat<(int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
9042 (SHA1M v4i32:$hash_abcd,
9043 (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
9044 def : Pat<(int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
9045 (SHA1P v4i32:$hash_abcd,
9046 (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
9048 // Additional patterns to match shl to USHL.
9049 def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
9050 (USHLvvv_8B $Rn, $Rm)>;
9051 def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
9052 (USHLvvv_4H $Rn, $Rm)>;
9053 def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
9054 (USHLvvv_2S $Rn, $Rm)>;
9055 def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
9056 (USHLddd $Rn, $Rm)>;
9057 def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
9058 (USHLvvv_16B $Rn, $Rm)>;
9059 def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
9060 (USHLvvv_8H $Rn, $Rm)>;
9061 def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
9062 (USHLvvv_4S $Rn, $Rm)>;
9063 def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
9064 (USHLvvv_2D $Rn, $Rm)>;
9066 def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
9068 (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
9069 (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
9071 def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
9073 (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
9074 (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
9076 def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
9078 (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
9079 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
9082 // Additional patterns to match sra, srl.
9083 // For a vector right shift by vector, the shift amounts of SSHL/USHL are
9084 // negative. Negate the vector of shift amount first.
9085 def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
9086 (USHLvvv_8B $Rn, (NEG8b $Rm))>;
9087 def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
9088 (USHLvvv_4H $Rn, (NEG4h $Rm))>;
9089 def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
9090 (USHLvvv_2S $Rn, (NEG2s $Rm))>;
9091 def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
9092 (USHLddd $Rn, (NEGdd $Rm))>;
9093 def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
9094 (USHLvvv_16B $Rn, (NEG16b $Rm))>;
9095 def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
9096 (USHLvvv_8H $Rn, (NEG8h $Rm))>;
9097 def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
9098 (USHLvvv_4S $Rn, (NEG4s $Rm))>;
9099 def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
9100 (USHLvvv_2D $Rn, (NEG2d $Rm))>;
9102 def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
9104 (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
9105 (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))),
9107 def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
9109 (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
9110 (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))),
9112 def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
9114 (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
9115 (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))),
9118 def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
9119 (SSHLvvv_8B $Rn, (NEG8b $Rm))>;
9120 def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
9121 (SSHLvvv_4H $Rn, (NEG4h $Rm))>;
9122 def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
9123 (SSHLvvv_2S $Rn, (NEG2s $Rm))>;
9124 def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
9125 (SSHLddd $Rn, (NEGdd $Rm))>;
9126 def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
9127 (SSHLvvv_16B $Rn, (NEG16b $Rm))>;
9128 def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
9129 (SSHLvvv_8H $Rn, (NEG8h $Rm))>;
9130 def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
9131 (SSHLvvv_4S $Rn, (NEG4s $Rm))>;
9132 def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
9133 (SSHLvvv_2D $Rn, (NEG2d $Rm))>;
9135 def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
9137 (SSHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
9138 (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))),
9140 def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
9142 (SSHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
9143 (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))),
9145 def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
9147 (SSHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
9148 (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))),
9152 // Patterns for handling half-precision values
9155 // Convert between f16 value and f32 value
9156 def : Pat<(f32 (f16_to_f32 (i32 GPR32:$Rn))),
9157 (FCVTsh (EXTRACT_SUBREG (FMOVsw $Rn), sub_16))>;
9158 def : Pat<(i32 (f32_to_f16 (f32 FPR32:$Rn))),
9159 (FMOVws (SUBREG_TO_REG (i64 0), (f16 (FCVThs $Rn)), sub_16))>;
9161 // Convert f16 value coming in as i16 value to f32
9162 def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
9163 (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
9164 def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
9165 (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
9167 def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
9168 f32_to_f16 (f32 FPR32:$Rn))))))),
9171 // Patterns for vector extract of half-precision FP value in i16 storage type
9172 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
9173 (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
9174 (FCVTsh (f16 (DUPhv_H
9175 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9176 neon_uimm2_bare:$Imm)))>;
9178 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
9179 (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
9180 (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
9182 // Patterns for vector insert of half-precision FP value 0 in i16 storage type
9183 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9184 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
9185 (neon_uimm3_bare:$Imm))),
9186 (v8i16 (INSELh (v8i16 VPR128:$Rn),
9187 (v8i16 (SUBREG_TO_REG (i64 0),
9188 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
9190 neon_uimm3_bare:$Imm, 0))>;
9192 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9193 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
9194 (neon_uimm2_bare:$Imm))),
9195 (v4i16 (EXTRACT_SUBREG
9197 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9198 (v8i16 (SUBREG_TO_REG (i64 0),
9199 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
9201 neon_uimm2_bare:$Imm, 0)),
9204 // Patterns for vector insert of half-precision FP value in i16 storage type
9205 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9206 (i32 (assertsext (i32 (fp_to_sint
9207 (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
9208 (neon_uimm3_bare:$Imm))),
9209 (v8i16 (INSELh (v8i16 VPR128:$Rn),
9210 (v8i16 (SUBREG_TO_REG (i64 0),
9211 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
9213 neon_uimm3_bare:$Imm, 0))>;
9215 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9216 (i32 (assertsext (i32 (fp_to_sint
9217 (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
9218 (neon_uimm2_bare:$Imm))),
9219 (v4i16 (EXTRACT_SUBREG
9221 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9222 (v8i16 (SUBREG_TO_REG (i64 0),
9223 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
9225 neon_uimm2_bare:$Imm, 0)),
9228 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9229 (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
9230 (neon_uimm3_bare:$Imm1))),
9231 (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
9232 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
9234 // Patterns for vector copy of half-precision FP value in i16 storage type
9235 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
9236 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
9237 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
9239 (neon_uimm3_bare:$Imm1))),
9240 (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
9241 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
9243 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
9244 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
9245 (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
9247 (neon_uimm3_bare:$Imm1))),
9248 (v4i16 (EXTRACT_SUBREG
9250 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
9251 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
9252 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),