1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19 SDTCisSameAs<0, 3>]>>;
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
24 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
26 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38 [SDTCisVec<0>, SDTCisVec<1>]>>;
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
44 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
46 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
47 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
49 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
51 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
52 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
54 //===----------------------------------------------------------------------===//
56 //===----------------------------------------------------------------------===//
58 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
59 string asmop, SDPatternOperator opnode8B,
60 SDPatternOperator opnode16B,
62 let isCommutable = Commutable in {
63 def _8B : NeonI_3VSame<0b0, u, size, opcode,
64 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
65 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
66 [(set (v8i8 VPR64:$Rd),
67 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
70 def _16B : NeonI_3VSame<0b1, u, size, opcode,
71 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
72 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
73 [(set (v16i8 VPR128:$Rd),
74 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
80 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
81 string asmop, SDPatternOperator opnode,
83 let isCommutable = Commutable in {
84 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
85 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
86 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
87 [(set (v4i16 VPR64:$Rd),
88 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
91 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
92 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
93 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
94 [(set (v8i16 VPR128:$Rd),
95 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
98 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
99 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
100 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
101 [(set (v2i32 VPR64:$Rd),
102 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
105 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
106 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
107 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
108 [(set (v4i32 VPR128:$Rd),
109 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
113 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
114 string asmop, SDPatternOperator opnode,
116 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
117 let isCommutable = Commutable in {
118 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
119 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
120 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
121 [(set (v8i8 VPR64:$Rd),
122 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
125 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
126 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
127 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
128 [(set (v16i8 VPR128:$Rd),
129 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
134 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
135 string asmop, SDPatternOperator opnode,
137 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
138 let isCommutable = Commutable in {
139 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
140 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
141 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
142 [(set (v2i64 VPR128:$Rd),
143 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
148 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
149 // but Result types can be integer or floating point types.
150 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
151 string asmop, SDPatternOperator opnode2S,
152 SDPatternOperator opnode4S,
153 SDPatternOperator opnode2D,
154 ValueType ResTy2S, ValueType ResTy4S,
155 ValueType ResTy2D, bit Commutable = 0> {
156 let isCommutable = Commutable in {
157 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
158 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
159 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
160 [(set (ResTy2S VPR64:$Rd),
161 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
164 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
165 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
166 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
167 [(set (ResTy4S VPR128:$Rd),
168 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
171 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
172 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
173 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
174 [(set (ResTy2D VPR128:$Rd),
175 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
180 //===----------------------------------------------------------------------===//
181 // Instruction Definitions
182 //===----------------------------------------------------------------------===//
184 // Vector Arithmetic Instructions
186 // Vector Add (Integer and Floating-Point)
188 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
189 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
190 v2f32, v4f32, v2f64, 1>;
192 // Vector Sub (Integer and Floating-Point)
194 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
195 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
196 v2f32, v4f32, v2f64, 0>;
198 // Vector Multiply (Integer and Floating-Point)
200 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
201 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
202 v2f32, v4f32, v2f64, 1>;
204 // Vector Multiply (Polynomial)
206 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
207 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
209 // Vector Multiply-accumulate and Multiply-subtract (Integer)
211 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
212 // two operands constraints.
213 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
214 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
215 bits<5> opcode, SDPatternOperator opnode>
216 : NeonI_3VSame<q, u, size, opcode,
217 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
218 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
219 [(set (OpTy VPRC:$Rd),
220 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
222 let Constraints = "$src = $Rd";
225 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
226 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
228 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
229 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
232 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
233 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
234 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
235 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
236 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
237 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
238 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
239 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
240 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
241 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
242 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
243 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
245 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
246 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
247 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
248 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
249 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
250 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
251 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
252 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
253 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
254 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
255 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
256 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
258 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
260 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
261 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
263 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
264 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
266 let Predicates = [HasNEON, UseFusedMAC] in {
267 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
268 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
269 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
270 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
271 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
272 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
274 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
275 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
276 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
277 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
278 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
279 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
282 // We're also allowed to match the fma instruction regardless of compile
284 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
285 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
286 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
287 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
288 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
289 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
291 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
292 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
293 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
294 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
295 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
296 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
298 // Vector Divide (Floating-Point)
300 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
301 v2f32, v4f32, v2f64, 0>;
303 // Vector Bitwise Operations
305 // Vector Bitwise AND
307 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
309 // Vector Bitwise Exclusive OR
311 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
315 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
317 // ORR disassembled as MOV if Vn==Vm
319 // Vector Move - register
320 // Alias for ORR if Vn=Vm.
321 // FIXME: This is actually the preferred syntax but TableGen can't deal with
322 // custom printing of aliases.
323 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
324 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
325 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
326 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
328 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
329 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
330 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
332 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
333 OpCmodeConstVal->getZExtValue(), EltBits);
334 return (EltBits == 8 && EltVal == 0xff);
337 def Neon_immAllZeros: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
338 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
339 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
341 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
342 OpCmodeConstVal->getZExtValue(), EltBits);
343 return (EltBits == 8 && EltVal == 0x0);
347 def Neon_not8B : PatFrag<(ops node:$in),
348 (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
349 def Neon_not16B : PatFrag<(ops node:$in),
350 (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
352 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
353 (or node:$Rn, (Neon_not8B node:$Rm))>;
355 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
356 (or node:$Rn, (Neon_not16B node:$Rm))>;
358 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
359 (and node:$Rn, (Neon_not8B node:$Rm))>;
361 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
362 (and node:$Rn, (Neon_not16B node:$Rm))>;
365 // Vector Bitwise OR NOT - register
367 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
368 Neon_orn8B, Neon_orn16B, 0>;
370 // Vector Bitwise Bit Clear (AND NOT) - register
372 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
373 Neon_bic8B, Neon_bic16B, 0>;
375 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
376 SDPatternOperator opnode16B,
378 Instruction INST16B> {
379 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
380 (INST8B VPR64:$Rn, VPR64:$Rm)>;
381 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
382 (INST8B VPR64:$Rn, VPR64:$Rm)>;
383 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
384 (INST8B VPR64:$Rn, VPR64:$Rm)>;
385 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
386 (INST16B VPR128:$Rn, VPR128:$Rm)>;
387 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
388 (INST16B VPR128:$Rn, VPR128:$Rm)>;
389 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
390 (INST16B VPR128:$Rn, VPR128:$Rm)>;
393 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
394 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
395 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
396 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
397 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
398 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
400 // Vector Bitwise Select
401 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
402 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
404 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
405 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
407 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
409 Instruction INST16B> {
410 // Disassociate type from instruction definition
411 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
412 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
413 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
414 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
415 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
416 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
417 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
418 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
419 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
420 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
421 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
422 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
424 // Allow to match BSL instruction pattern with non-constant operand
425 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
426 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
427 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
428 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
429 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
430 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
431 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
432 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
433 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
434 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
435 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
436 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
437 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
438 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
439 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
440 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
441 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
442 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
443 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
444 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
445 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
446 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
447 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
448 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
450 // Allow to match llvm.arm.* intrinsics.
451 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
452 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
453 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
454 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
455 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
456 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
457 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
458 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
459 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
460 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
461 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
462 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
463 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
464 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
465 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
466 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
467 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
468 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
469 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
470 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
471 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
473 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
474 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
475 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
476 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
477 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
478 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
479 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
480 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
481 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
482 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
483 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
486 // Additional patterns for bitwise instruction BSL
487 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
489 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
490 (Neon_bsl node:$src, node:$Rn, node:$Rm),
491 [{ (void)N; return false; }]>;
493 // Vector Bitwise Insert if True
495 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
496 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
497 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
498 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
500 // Vector Bitwise Insert if False
502 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
503 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
504 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
505 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
507 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
509 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
510 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
511 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
512 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
514 // Vector Absolute Difference and Accumulate (Unsigned)
515 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
516 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
517 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
518 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
519 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
520 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
521 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
522 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
523 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
524 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
525 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
526 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
528 // Vector Absolute Difference and Accumulate (Signed)
529 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
530 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
531 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
532 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
533 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
534 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
535 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
536 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
537 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
538 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
539 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
540 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
543 // Vector Absolute Difference (Signed, Unsigned)
544 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
545 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
547 // Vector Absolute Difference (Floating Point)
548 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
549 int_arm_neon_vabds, int_arm_neon_vabds,
550 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
552 // Vector Reciprocal Step (Floating Point)
553 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
554 int_arm_neon_vrecps, int_arm_neon_vrecps,
556 v2f32, v4f32, v2f64, 0>;
558 // Vector Reciprocal Square Root Step (Floating Point)
559 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
560 int_arm_neon_vrsqrts,
561 int_arm_neon_vrsqrts,
562 int_arm_neon_vrsqrts,
563 v2f32, v4f32, v2f64, 0>;
565 // Vector Comparisons
567 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
568 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
569 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
570 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
571 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
572 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
573 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
574 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
575 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
576 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
578 // NeonI_compare_aliases class: swaps register operands to implement
579 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
580 class NeonI_compare_aliases<string asmop, string asmlane,
581 Instruction inst, RegisterOperand VPRC>
582 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
584 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
586 // Vector Comparisons (Integer)
588 // Vector Compare Mask Equal (Integer)
589 let isCommutable =1 in {
590 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
593 // Vector Compare Mask Higher or Same (Unsigned Integer)
594 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
596 // Vector Compare Mask Greater Than or Equal (Integer)
597 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
599 // Vector Compare Mask Higher (Unsigned Integer)
600 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
602 // Vector Compare Mask Greater Than (Integer)
603 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
605 // Vector Compare Mask Bitwise Test (Integer)
606 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
608 // Vector Compare Mask Less or Same (Unsigned Integer)
609 // CMLS is alias for CMHS with operands reversed.
610 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
611 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
612 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
613 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
614 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
615 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
616 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
618 // Vector Compare Mask Less Than or Equal (Integer)
619 // CMLE is alias for CMGE with operands reversed.
620 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
621 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
622 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
623 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
624 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
625 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
626 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
628 // Vector Compare Mask Lower (Unsigned Integer)
629 // CMLO is alias for CMHI with operands reversed.
630 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
631 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
632 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
633 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
634 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
635 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
636 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
638 // Vector Compare Mask Less Than (Integer)
639 // CMLT is alias for CMGT with operands reversed.
640 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
641 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
642 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
643 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
644 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
645 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
646 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
649 def neon_uimm0_asmoperand : AsmOperandClass
652 let PredicateMethod = "isUImm<0>";
653 let RenderMethod = "addImmOperands";
656 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
657 let ParserMatchClass = neon_uimm0_asmoperand;
658 let PrintMethod = "printNeonUImm0Operand";
662 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
664 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
665 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
666 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
667 [(set (v8i8 VPR64:$Rd),
668 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
671 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
672 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
673 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
674 [(set (v16i8 VPR128:$Rd),
675 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
678 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
679 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
680 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
681 [(set (v4i16 VPR64:$Rd),
682 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
685 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
686 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
687 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
688 [(set (v8i16 VPR128:$Rd),
689 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
692 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
693 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
694 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
695 [(set (v2i32 VPR64:$Rd),
696 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
699 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
700 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
701 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
702 [(set (v4i32 VPR128:$Rd),
703 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
706 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
707 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
708 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
709 [(set (v2i64 VPR128:$Rd),
710 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
714 // Vector Compare Mask Equal to Zero (Integer)
715 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
717 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
718 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
720 // Vector Compare Mask Greater Than Zero (Signed Integer)
721 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
723 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
724 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
726 // Vector Compare Mask Less Than Zero (Signed Integer)
727 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
729 // Vector Comparisons (Floating Point)
731 // Vector Compare Mask Equal (Floating Point)
732 let isCommutable =1 in {
733 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
734 Neon_cmeq, Neon_cmeq,
735 v2i32, v4i32, v2i64, 0>;
738 // Vector Compare Mask Greater Than Or Equal (Floating Point)
739 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
740 Neon_cmge, Neon_cmge,
741 v2i32, v4i32, v2i64, 0>;
743 // Vector Compare Mask Greater Than (Floating Point)
744 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
745 Neon_cmgt, Neon_cmgt,
746 v2i32, v4i32, v2i64, 0>;
748 // Vector Compare Mask Less Than Or Equal (Floating Point)
749 // FCMLE is alias for FCMGE with operands reversed.
750 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
751 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
752 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
754 // Vector Compare Mask Less Than (Floating Point)
755 // FCMLT is alias for FCMGT with operands reversed.
756 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
757 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
758 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
761 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
762 string asmop, CondCode CC>
764 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
765 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
766 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
767 [(set (v2i32 VPR64:$Rd),
768 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
771 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
772 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
773 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
774 [(set (v4i32 VPR128:$Rd),
775 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
778 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
779 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
780 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
781 [(set (v2i64 VPR128:$Rd),
782 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
786 // Vector Compare Mask Equal to Zero (Floating Point)
787 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
789 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
790 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
792 // Vector Compare Mask Greater Than Zero (Floating Point)
793 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
795 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
796 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
798 // Vector Compare Mask Less Than Zero (Floating Point)
799 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
801 // Vector Absolute Comparisons (Floating Point)
803 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
804 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
805 int_arm_neon_vacged, int_arm_neon_vacgeq,
806 int_aarch64_neon_vacgeq,
807 v2i32, v4i32, v2i64, 0>;
809 // Vector Absolute Compare Mask Greater Than (Floating Point)
810 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
811 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
812 int_aarch64_neon_vacgtq,
813 v2i32, v4i32, v2i64, 0>;
815 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
816 // FACLE is alias for FACGE with operands reversed.
817 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
818 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
819 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
821 // Vector Absolute Compare Mask Less Than (Floating Point)
822 // FACLT is alias for FACGT with operands reversed.
823 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
824 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
825 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
827 // Vector halving add (Integer Signed, Unsigned)
828 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
829 int_arm_neon_vhadds, 1>;
830 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
831 int_arm_neon_vhaddu, 1>;
833 // Vector halving sub (Integer Signed, Unsigned)
834 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
835 int_arm_neon_vhsubs, 0>;
836 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
837 int_arm_neon_vhsubu, 0>;
839 // Vector rouding halving add (Integer Signed, Unsigned)
840 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
841 int_arm_neon_vrhadds, 1>;
842 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
843 int_arm_neon_vrhaddu, 1>;
845 // Vector Saturating add (Integer Signed, Unsigned)
846 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
847 int_arm_neon_vqadds, 1>;
848 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
849 int_arm_neon_vqaddu, 1>;
851 // Vector Saturating sub (Integer Signed, Unsigned)
852 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
853 int_arm_neon_vqsubs, 1>;
854 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
855 int_arm_neon_vqsubu, 1>;
857 // Vector Shift Left (Signed and Unsigned Integer)
858 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
859 int_arm_neon_vshifts, 1>;
860 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
861 int_arm_neon_vshiftu, 1>;
863 // Vector Saturating Shift Left (Signed and Unsigned Integer)
864 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
865 int_arm_neon_vqshifts, 1>;
866 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
867 int_arm_neon_vqshiftu, 1>;
869 // Vector Rouding Shift Left (Signed and Unsigned Integer)
870 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
871 int_arm_neon_vrshifts, 1>;
872 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
873 int_arm_neon_vrshiftu, 1>;
875 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
876 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
877 int_arm_neon_vqrshifts, 1>;
878 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
879 int_arm_neon_vqrshiftu, 1>;
881 // Vector Maximum (Signed and Unsigned Integer)
882 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
883 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
885 // Vector Minimum (Signed and Unsigned Integer)
886 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
887 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
889 // Vector Maximum (Floating Point)
890 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
891 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
892 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
894 // Vector Minimum (Floating Point)
895 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
896 int_arm_neon_vmins, int_arm_neon_vmins,
897 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
899 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
900 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
901 int_aarch64_neon_vmaxnm,
902 int_aarch64_neon_vmaxnm,
903 int_aarch64_neon_vmaxnm,
904 v2f32, v4f32, v2f64, 1>;
906 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
907 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
908 int_aarch64_neon_vminnm,
909 int_aarch64_neon_vminnm,
910 int_aarch64_neon_vminnm,
911 v2f32, v4f32, v2f64, 1>;
913 // Vector Maximum Pairwise (Signed and Unsigned Integer)
914 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
915 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
917 // Vector Minimum Pairwise (Signed and Unsigned Integer)
918 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
919 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
921 // Vector Maximum Pairwise (Floating Point)
922 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
923 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
924 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
926 // Vector Minimum Pairwise (Floating Point)
927 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
928 int_arm_neon_vpmins, int_arm_neon_vpmins,
929 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
931 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
932 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
933 int_aarch64_neon_vpmaxnm,
934 int_aarch64_neon_vpmaxnm,
935 int_aarch64_neon_vpmaxnm,
936 v2f32, v4f32, v2f64, 1>;
938 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
939 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
940 int_aarch64_neon_vpminnm,
941 int_aarch64_neon_vpminnm,
942 int_aarch64_neon_vpminnm,
943 v2f32, v4f32, v2f64, 1>;
945 // Vector Addition Pairwise (Integer)
946 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
948 // Vector Addition Pairwise (Floating Point)
949 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
953 v2f32, v4f32, v2f64, 1>;
955 // Vector Saturating Doubling Multiply High
956 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
957 int_arm_neon_vqdmulh, 1>;
959 // Vector Saturating Rouding Doubling Multiply High
960 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
961 int_arm_neon_vqrdmulh, 1>;
963 // Vector Multiply Extended (Floating Point)
964 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
965 int_aarch64_neon_vmulx,
966 int_aarch64_neon_vmulx,
967 int_aarch64_neon_vmulx,
968 v2f32, v4f32, v2f64, 1>;
970 // Vector Immediate Instructions
972 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
974 def _asmoperand : AsmOperandClass
976 let Name = "NeonMovImmShift" # PREFIX;
977 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
978 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
982 // Definition of vector immediates shift operands
984 // The selectable use-cases extract the shift operation
985 // information from the OpCmode fields encoded in the immediate.
986 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
987 uint64_t OpCmode = N->getZExtValue();
989 unsigned ShiftOnesIn;
991 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
992 if (!HasShift) return SDValue();
993 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
996 // Vector immediates shift operands which accept LSL and MSL
997 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
998 // or 0, 8 (LSLH) or 8, 16 (MSL).
999 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1000 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1001 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
1002 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1004 multiclass neon_mov_imm_shift_operands<string PREFIX,
1005 string HALF, string ISHALF, code pred>
1007 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1010 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1012 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1013 let ParserMatchClass =
1014 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1018 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1020 unsigned ShiftOnesIn;
1022 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1023 return (HasShift && !ShiftOnesIn);
1026 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1028 unsigned ShiftOnesIn;
1030 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1031 return (HasShift && ShiftOnesIn);
1034 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1036 unsigned ShiftOnesIn;
1038 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1039 return (HasShift && !ShiftOnesIn);
1042 def neon_uimm1_asmoperand : AsmOperandClass
1045 let PredicateMethod = "isUImm<1>";
1046 let RenderMethod = "addImmOperands";
1049 def neon_uimm2_asmoperand : AsmOperandClass
1052 let PredicateMethod = "isUImm<2>";
1053 let RenderMethod = "addImmOperands";
1056 def neon_uimm8_asmoperand : AsmOperandClass
1059 let PredicateMethod = "isUImm<8>";
1060 let RenderMethod = "addImmOperands";
1063 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1064 let ParserMatchClass = neon_uimm8_asmoperand;
1065 let PrintMethod = "printNeonUImm8Operand";
1068 def neon_uimm64_mask_asmoperand : AsmOperandClass
1070 let Name = "NeonUImm64Mask";
1071 let PredicateMethod = "isNeonUImm64Mask";
1072 let RenderMethod = "addNeonUImm64MaskOperands";
1075 // MCOperand for 64-bit bytemask with each byte having only the
1076 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1077 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1078 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1079 let PrintMethod = "printNeonUImm64MaskOperand";
1082 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1083 SDPatternOperator opnode>
1085 // shift zeros, per word
1086 def _2S : NeonI_1VModImm<0b0, op,
1088 (ins neon_uimm8:$Imm,
1089 neon_mov_imm_LSL_operand:$Simm),
1090 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1091 [(set (v2i32 VPR64:$Rd),
1092 (v2i32 (opnode (timm:$Imm),
1093 (neon_mov_imm_LSL_operand:$Simm))))],
1096 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1099 def _4S : NeonI_1VModImm<0b1, op,
1101 (ins neon_uimm8:$Imm,
1102 neon_mov_imm_LSL_operand:$Simm),
1103 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1104 [(set (v4i32 VPR128:$Rd),
1105 (v4i32 (opnode (timm:$Imm),
1106 (neon_mov_imm_LSL_operand:$Simm))))],
1109 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1112 // shift zeros, per halfword
1113 def _4H : NeonI_1VModImm<0b0, op,
1115 (ins neon_uimm8:$Imm,
1116 neon_mov_imm_LSLH_operand:$Simm),
1117 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1118 [(set (v4i16 VPR64:$Rd),
1119 (v4i16 (opnode (timm:$Imm),
1120 (neon_mov_imm_LSLH_operand:$Simm))))],
1123 let cmode = {0b1, 0b0, Simm, 0b0};
1126 def _8H : NeonI_1VModImm<0b1, op,
1128 (ins neon_uimm8:$Imm,
1129 neon_mov_imm_LSLH_operand:$Simm),
1130 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1131 [(set (v8i16 VPR128:$Rd),
1132 (v8i16 (opnode (timm:$Imm),
1133 (neon_mov_imm_LSLH_operand:$Simm))))],
1136 let cmode = {0b1, 0b0, Simm, 0b0};
1140 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1141 SDPatternOperator opnode,
1142 SDPatternOperator neonopnode>
1144 let Constraints = "$src = $Rd" in {
1145 // shift zeros, per word
1146 def _2S : NeonI_1VModImm<0b0, op,
1148 (ins VPR64:$src, neon_uimm8:$Imm,
1149 neon_mov_imm_LSL_operand:$Simm),
1150 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1151 [(set (v2i32 VPR64:$Rd),
1152 (v2i32 (opnode (v2i32 VPR64:$src),
1153 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1154 neon_mov_imm_LSL_operand:$Simm)))))))],
1157 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1160 def _4S : NeonI_1VModImm<0b1, op,
1162 (ins VPR128:$src, neon_uimm8:$Imm,
1163 neon_mov_imm_LSL_operand:$Simm),
1164 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1165 [(set (v4i32 VPR128:$Rd),
1166 (v4i32 (opnode (v4i32 VPR128:$src),
1167 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1168 neon_mov_imm_LSL_operand:$Simm)))))))],
1171 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1174 // shift zeros, per halfword
1175 def _4H : NeonI_1VModImm<0b0, op,
1177 (ins VPR64:$src, neon_uimm8:$Imm,
1178 neon_mov_imm_LSLH_operand:$Simm),
1179 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1180 [(set (v4i16 VPR64:$Rd),
1181 (v4i16 (opnode (v4i16 VPR64:$src),
1182 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1183 neon_mov_imm_LSL_operand:$Simm)))))))],
1186 let cmode = {0b1, 0b0, Simm, 0b1};
1189 def _8H : NeonI_1VModImm<0b1, op,
1191 (ins VPR128:$src, neon_uimm8:$Imm,
1192 neon_mov_imm_LSLH_operand:$Simm),
1193 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1194 [(set (v8i16 VPR128:$Rd),
1195 (v8i16 (opnode (v8i16 VPR128:$src),
1196 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1197 neon_mov_imm_LSL_operand:$Simm)))))))],
1200 let cmode = {0b1, 0b0, Simm, 0b1};
1205 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1206 SDPatternOperator opnode>
1208 // shift ones, per word
1209 def _2S : NeonI_1VModImm<0b0, op,
1211 (ins neon_uimm8:$Imm,
1212 neon_mov_imm_MSL_operand:$Simm),
1213 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1214 [(set (v2i32 VPR64:$Rd),
1215 (v2i32 (opnode (timm:$Imm),
1216 (neon_mov_imm_MSL_operand:$Simm))))],
1219 let cmode = {0b1, 0b1, 0b0, Simm};
1222 def _4S : NeonI_1VModImm<0b1, op,
1224 (ins neon_uimm8:$Imm,
1225 neon_mov_imm_MSL_operand:$Simm),
1226 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1227 [(set (v4i32 VPR128:$Rd),
1228 (v4i32 (opnode (timm:$Imm),
1229 (neon_mov_imm_MSL_operand:$Simm))))],
1232 let cmode = {0b1, 0b1, 0b0, Simm};
1236 // Vector Move Immediate Shifted
1237 let isReMaterializable = 1 in {
1238 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1241 // Vector Move Inverted Immediate Shifted
1242 let isReMaterializable = 1 in {
1243 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1246 // Vector Bitwise Bit Clear (AND NOT) - immediate
1247 let isReMaterializable = 1 in {
1248 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1252 // Vector Bitwise OR - immedidate
1254 let isReMaterializable = 1 in {
1255 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1259 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1260 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1261 // BIC immediate instructions selection requires additional patterns to
1262 // transform Neon_movi operands into BIC immediate operands
1264 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1265 uint64_t OpCmode = N->getZExtValue();
1267 unsigned ShiftOnesIn;
1268 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1269 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1270 // Transform encoded shift amount 0 to 1 and 1 to 0.
1271 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1274 def neon_mov_imm_LSLH_transform_operand
1277 unsigned ShiftOnesIn;
1279 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1280 return (HasShift && !ShiftOnesIn); }],
1281 neon_mov_imm_LSLH_transform_XFORM>;
1283 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1284 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1285 def : Pat<(v4i16 (and VPR64:$src,
1286 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1287 (BICvi_lsl_4H VPR64:$src, 0,
1288 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1290 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1291 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1292 def : Pat<(v8i16 (and VPR128:$src,
1293 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1294 (BICvi_lsl_8H VPR128:$src, 0,
1295 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1298 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1299 SDPatternOperator neonopnode,
1301 Instruction INST8H> {
1302 def : Pat<(v8i8 (opnode VPR64:$src,
1303 (bitconvert(v4i16 (neonopnode timm:$Imm,
1304 neon_mov_imm_LSLH_operand:$Simm))))),
1305 (INST4H VPR64:$src, neon_uimm8:$Imm,
1306 neon_mov_imm_LSLH_operand:$Simm)>;
1307 def : Pat<(v1i64 (opnode VPR64:$src,
1308 (bitconvert(v4i16 (neonopnode timm:$Imm,
1309 neon_mov_imm_LSLH_operand:$Simm))))),
1310 (INST4H VPR64:$src, neon_uimm8:$Imm,
1311 neon_mov_imm_LSLH_operand:$Simm)>;
1313 def : Pat<(v16i8 (opnode VPR128:$src,
1314 (bitconvert(v8i16 (neonopnode timm:$Imm,
1315 neon_mov_imm_LSLH_operand:$Simm))))),
1316 (INST8H VPR128:$src, neon_uimm8:$Imm,
1317 neon_mov_imm_LSLH_operand:$Simm)>;
1318 def : Pat<(v4i32 (opnode VPR128:$src,
1319 (bitconvert(v8i16 (neonopnode timm:$Imm,
1320 neon_mov_imm_LSLH_operand:$Simm))))),
1321 (INST8H VPR128:$src, neon_uimm8:$Imm,
1322 neon_mov_imm_LSLH_operand:$Simm)>;
1323 def : Pat<(v2i64 (opnode VPR128:$src,
1324 (bitconvert(v8i16 (neonopnode timm:$Imm,
1325 neon_mov_imm_LSLH_operand:$Simm))))),
1326 (INST8H VPR128:$src, neon_uimm8:$Imm,
1327 neon_mov_imm_LSLH_operand:$Simm)>;
1330 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1331 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1333 // Additional patterns for Vector Bitwise OR - immedidate
1334 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1337 // Vector Move Immediate Masked
1338 let isReMaterializable = 1 in {
1339 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1342 // Vector Move Inverted Immediate Masked
1343 let isReMaterializable = 1 in {
1344 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1347 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1348 Instruction inst, RegisterOperand VPRC>
1349 : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1350 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1352 // Aliases for Vector Move Immediate Shifted
1353 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1354 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1355 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1356 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1358 // Aliases for Vector Move Inverted Immediate Shifted
1359 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1360 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1361 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1362 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1364 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1365 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1366 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1367 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1368 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1370 // Aliases for Vector Bitwise OR - immedidate
1371 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1372 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1373 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1374 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1376 // Vector Move Immediate - per byte
1377 let isReMaterializable = 1 in {
1378 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1379 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1380 "movi\t$Rd.8b, $Imm",
1381 [(set (v8i8 VPR64:$Rd),
1382 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1387 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1388 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1389 "movi\t$Rd.16b, $Imm",
1390 [(set (v16i8 VPR128:$Rd),
1391 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1397 // Vector Move Immediate - bytemask, per double word
1398 let isReMaterializable = 1 in {
1399 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1400 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1401 "movi\t $Rd.2d, $Imm",
1402 [(set (v2i64 VPR128:$Rd),
1403 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1409 // Vector Move Immediate - bytemask, one doubleword
1411 let isReMaterializable = 1 in {
1412 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1413 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1415 [(set (f64 FPR64:$Rd),
1417 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1423 // Vector Floating Point Move Immediate
1425 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1426 Operand immOpType, bit q, bit op>
1427 : NeonI_1VModImm<q, op,
1428 (outs VPRC:$Rd), (ins immOpType:$Imm),
1429 "fmov\t$Rd" # asmlane # ", $Imm",
1430 [(set (OpTy VPRC:$Rd),
1431 (OpTy (Neon_fmovi (timm:$Imm))))],
1436 let isReMaterializable = 1 in {
1437 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1438 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1439 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1442 // Vector Shift (Immediate)
1443 // Immediate in [0, 63]
1444 def imm0_63 : Operand<i32> {
1445 let ParserMatchClass = uimm6_asmoperand;
1448 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1452 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1453 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1454 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1455 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1457 // The shift right immediate amount, in the range 1 to element bits, is computed
1458 // as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
1459 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1461 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1462 let Name = "ShrImm" # OFFSET;
1463 let RenderMethod = "addImmOperands";
1464 let DiagnosticType = "ShrImm" # OFFSET;
1467 class shr_imm<string OFFSET> : Operand<i32> {
1468 let EncoderMethod = "getShiftRightImm" # OFFSET;
1469 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1470 let ParserMatchClass =
1471 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1474 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1475 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1476 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1477 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1479 def shr_imm8 : shr_imm<"8">;
1480 def shr_imm16 : shr_imm<"16">;
1481 def shr_imm32 : shr_imm<"32">;
1482 def shr_imm64 : shr_imm<"64">;
1484 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1485 let Name = "ShlImm" # OFFSET;
1486 let RenderMethod = "addImmOperands";
1487 let DiagnosticType = "ShlImm" # OFFSET;
1490 class shl_imm<string OFFSET> : Operand<i32> {
1491 let EncoderMethod = "getShiftLeftImm" # OFFSET;
1492 let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1493 let ParserMatchClass =
1494 !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1497 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1498 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1499 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1500 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1502 def shl_imm8 : shl_imm<"8">;
1503 def shl_imm16 : shl_imm<"16">;
1504 def shl_imm32 : shl_imm<"32">;
1505 def shl_imm64 : shl_imm<"64">;
1507 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1508 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1509 : NeonI_2VShiftImm<q, u, opcode,
1510 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1511 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1512 [(set (Ty VPRC:$Rd),
1513 (Ty (OpNode (Ty VPRC:$Rn),
1514 (Ty (Neon_vdup (i32 imm:$Imm))))))],
1517 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1518 // 64-bit vector types.
1519 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1520 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1523 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1524 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1527 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1528 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1531 // 128-bit vector types.
1532 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1533 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1536 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1537 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1540 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1541 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1544 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1545 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1549 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1550 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1552 let Inst{22-19} = 0b0001;
1555 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1557 let Inst{22-20} = 0b001;
1560 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1562 let Inst{22-21} = 0b01;
1565 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1567 let Inst{22-19} = 0b0001;
1570 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1572 let Inst{22-20} = 0b001;
1575 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1577 let Inst{22-21} = 0b01;
1580 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1587 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1590 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1591 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1593 def Neon_High16B : PatFrag<(ops node:$in),
1594 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1595 def Neon_High8H : PatFrag<(ops node:$in),
1596 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1597 def Neon_High4S : PatFrag<(ops node:$in),
1598 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1600 def Neon_low8H : PatFrag<(ops node:$in),
1601 (v4i16 (extract_subvector (v8i16 node:$in),
1603 def Neon_low4S : PatFrag<(ops node:$in),
1604 (v2i32 (extract_subvector (v4i32 node:$in),
1606 def Neon_low4f : PatFrag<(ops node:$in),
1607 (v2f32 (extract_subvector (v4f32 node:$in),
1610 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1611 string SrcT, ValueType DestTy, ValueType SrcTy,
1612 Operand ImmTy, SDPatternOperator ExtOp>
1613 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1614 (ins VPR64:$Rn, ImmTy:$Imm),
1615 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1616 [(set (DestTy VPR128:$Rd),
1618 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1619 (DestTy (Neon_vdup (i32 imm:$Imm))))))],
1622 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1623 string SrcT, ValueType DestTy, ValueType SrcTy,
1624 int StartIndex, Operand ImmTy,
1625 SDPatternOperator ExtOp, PatFrag getTop>
1626 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1627 (ins VPR128:$Rn, ImmTy:$Imm),
1628 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1629 [(set (DestTy VPR128:$Rd),
1632 (SrcTy (getTop VPR128:$Rn)))),
1633 (DestTy (Neon_vdup (i32 imm:$Imm))))))],
1636 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1638 // 64-bit vector types.
1639 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1641 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1644 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1646 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1649 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1651 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1654 // 128-bit vector types
1655 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1656 v8i16, v8i8, 8, uimm3, ExtOp, Neon_High16B> {
1657 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1660 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1661 v4i32, v4i16, 4, uimm4, ExtOp, Neon_High8H> {
1662 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1665 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1666 v2i64, v2i32, 2, uimm5, ExtOp, Neon_High4S> {
1667 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1670 // Use other patterns to match when the immediate is 0.
1671 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1672 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1674 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1675 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1677 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1678 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1680 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1681 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1683 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1684 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1686 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1687 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1691 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1692 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1694 // Rounding/Saturating shift
1695 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1696 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1697 SDPatternOperator OpNode>
1698 : NeonI_2VShiftImm<q, u, opcode,
1699 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1700 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1701 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1705 // shift right (vector by immediate)
1706 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1707 SDPatternOperator OpNode> {
1708 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1710 let Inst{22-19} = 0b0001;
1713 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1715 let Inst{22-20} = 0b001;
1718 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1720 let Inst{22-21} = 0b01;
1723 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1725 let Inst{22-19} = 0b0001;
1728 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1730 let Inst{22-20} = 0b001;
1733 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1735 let Inst{22-21} = 0b01;
1738 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1744 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1745 SDPatternOperator OpNode> {
1746 // 64-bit vector types.
1747 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1749 let Inst{22-19} = 0b0001;
1752 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1754 let Inst{22-20} = 0b001;
1757 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1759 let Inst{22-21} = 0b01;
1762 // 128-bit vector types.
1763 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1765 let Inst{22-19} = 0b0001;
1768 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1770 let Inst{22-20} = 0b001;
1773 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1775 let Inst{22-21} = 0b01;
1778 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1784 // Rounding shift right
1785 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1786 int_aarch64_neon_vsrshr>;
1787 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1788 int_aarch64_neon_vurshr>;
1790 // Saturating shift left unsigned
1791 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1793 // Saturating shift left
1794 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1795 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1797 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1798 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1800 : NeonI_2VShiftImm<q, u, opcode,
1801 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1802 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1803 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1804 (Ty (OpNode (Ty VPRC:$Rn),
1805 (Ty (Neon_vdup (i32 imm:$Imm))))))))],
1807 let Constraints = "$src = $Rd";
1810 // Shift Right accumulate
1811 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1812 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1814 let Inst{22-19} = 0b0001;
1817 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1819 let Inst{22-20} = 0b001;
1822 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1824 let Inst{22-21} = 0b01;
1827 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1829 let Inst{22-19} = 0b0001;
1832 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1834 let Inst{22-20} = 0b001;
1837 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1839 let Inst{22-21} = 0b01;
1842 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1848 // Shift right and accumulate
1849 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1850 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1852 // Rounding shift accumulate
1853 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1854 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1855 SDPatternOperator OpNode>
1856 : NeonI_2VShiftImm<q, u, opcode,
1857 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1858 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1859 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1860 (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1862 let Constraints = "$src = $Rd";
1865 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1866 SDPatternOperator OpNode> {
1867 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1869 let Inst{22-19} = 0b0001;
1872 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1874 let Inst{22-20} = 0b001;
1877 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1879 let Inst{22-21} = 0b01;
1882 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1884 let Inst{22-19} = 0b0001;
1887 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1889 let Inst{22-20} = 0b001;
1892 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1894 let Inst{22-21} = 0b01;
1897 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1903 // Rounding shift right and accumulate
1904 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1905 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1907 // Shift insert by immediate
1908 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1909 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1910 SDPatternOperator OpNode>
1911 : NeonI_2VShiftImm<q, u, opcode,
1912 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1913 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1914 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1917 let Constraints = "$src = $Rd";
1920 // shift left insert (vector by immediate)
1921 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1922 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1923 int_aarch64_neon_vsli> {
1924 let Inst{22-19} = 0b0001;
1927 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1928 int_aarch64_neon_vsli> {
1929 let Inst{22-20} = 0b001;
1932 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1933 int_aarch64_neon_vsli> {
1934 let Inst{22-21} = 0b01;
1937 // 128-bit vector types
1938 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1939 int_aarch64_neon_vsli> {
1940 let Inst{22-19} = 0b0001;
1943 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1944 int_aarch64_neon_vsli> {
1945 let Inst{22-20} = 0b001;
1948 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1949 int_aarch64_neon_vsli> {
1950 let Inst{22-21} = 0b01;
1953 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1954 int_aarch64_neon_vsli> {
1959 // shift right insert (vector by immediate)
1960 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1961 // 64-bit vector types.
1962 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1963 int_aarch64_neon_vsri> {
1964 let Inst{22-19} = 0b0001;
1967 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1968 int_aarch64_neon_vsri> {
1969 let Inst{22-20} = 0b001;
1972 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1973 int_aarch64_neon_vsri> {
1974 let Inst{22-21} = 0b01;
1977 // 128-bit vector types
1978 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1979 int_aarch64_neon_vsri> {
1980 let Inst{22-19} = 0b0001;
1983 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1984 int_aarch64_neon_vsri> {
1985 let Inst{22-20} = 0b001;
1988 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1989 int_aarch64_neon_vsri> {
1990 let Inst{22-21} = 0b01;
1993 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1994 int_aarch64_neon_vsri> {
1999 // Shift left and insert
2000 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2002 // Shift right and insert
2003 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2005 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2006 string SrcT, Operand ImmTy>
2007 : NeonI_2VShiftImm<q, u, opcode,
2008 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2009 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2012 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2013 string SrcT, Operand ImmTy>
2014 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2015 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2016 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2018 let Constraints = "$src = $Rd";
2021 // left long shift by immediate
2022 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2023 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2024 let Inst{22-19} = 0b0001;
2027 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2028 let Inst{22-20} = 0b001;
2031 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2032 let Inst{22-21} = 0b01;
2035 // Shift Narrow High
2036 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2038 let Inst{22-19} = 0b0001;
2041 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2043 let Inst{22-20} = 0b001;
2046 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2048 let Inst{22-21} = 0b01;
2052 // Shift right narrow
2053 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2055 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2056 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2057 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2058 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2059 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2060 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2061 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2062 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2064 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2065 (v2i64 (concat_vectors (v1i64 node:$Rm),
2066 (v1i64 node:$Rn)))>;
2067 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2068 (v8i16 (concat_vectors (v4i16 node:$Rm),
2069 (v4i16 node:$Rn)))>;
2070 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2071 (v4i32 (concat_vectors (v2i32 node:$Rm),
2072 (v2i32 node:$Rn)))>;
2073 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2074 (v4f32 (concat_vectors (v2f32 node:$Rm),
2075 (v2f32 node:$Rn)))>;
2076 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2077 (v2f64 (concat_vectors (v1f64 node:$Rm),
2078 (v1f64 node:$Rn)))>;
2080 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2081 (v8i16 (srl (v8i16 node:$lhs),
2082 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2083 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2084 (v4i32 (srl (v4i32 node:$lhs),
2085 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2086 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2087 (v2i64 (srl (v2i64 node:$lhs),
2088 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2089 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2090 (v8i16 (sra (v8i16 node:$lhs),
2091 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2092 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2093 (v4i32 (sra (v4i32 node:$lhs),
2094 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2095 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2096 (v2i64 (sra (v2i64 node:$lhs),
2097 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2099 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2100 multiclass Neon_shiftNarrow_patterns<string shr> {
2101 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2103 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2104 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2106 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2107 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2109 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2111 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2112 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2113 VPR128:$Rn, (i32 imm:$Imm))))))),
2114 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2115 VPR128:$Rn, imm:$Imm)>;
2116 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2117 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2118 VPR128:$Rn, (i32 imm:$Imm))))))),
2119 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2120 VPR128:$Rn, imm:$Imm)>;
2121 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2122 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2123 VPR128:$Rn, (i32 imm:$Imm))))))),
2124 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2125 VPR128:$Rn, imm:$Imm)>;
2128 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2129 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2130 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2131 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2132 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2133 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2134 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2136 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2137 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2138 (!cast<Instruction>(prefix # "_16B")
2139 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2140 VPR128:$Rn, imm:$Imm)>;
2141 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2142 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2143 (!cast<Instruction>(prefix # "_8H")
2144 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2145 VPR128:$Rn, imm:$Imm)>;
2146 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2147 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2148 (!cast<Instruction>(prefix # "_4S")
2149 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2150 VPR128:$Rn, imm:$Imm)>;
2153 defm : Neon_shiftNarrow_patterns<"lshr">;
2154 defm : Neon_shiftNarrow_patterns<"ashr">;
2156 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2157 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2158 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2159 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2160 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2161 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2162 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2164 // Convert fix-point and float-pointing
2165 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2166 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2167 Operand ImmTy, SDPatternOperator IntOp>
2168 : NeonI_2VShiftImm<q, u, opcode,
2169 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2170 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2171 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2175 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2176 SDPatternOperator IntOp> {
2177 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2179 let Inst{22-21} = 0b01;
2182 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2184 let Inst{22-21} = 0b01;
2187 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2193 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2194 SDPatternOperator IntOp> {
2195 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2197 let Inst{22-21} = 0b01;
2200 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2202 let Inst{22-21} = 0b01;
2205 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2211 // Convert fixed-point to floating-point
2212 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2213 int_arm_neon_vcvtfxs2fp>;
2214 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2215 int_arm_neon_vcvtfxu2fp>;
2217 // Convert floating-point to fixed-point
2218 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2219 int_arm_neon_vcvtfp2fxs>;
2220 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2221 int_arm_neon_vcvtfp2fxu>;
2223 multiclass Neon_sshll2_0<SDNode ext>
2225 def _v8i8 : PatFrag<(ops node:$Rn),
2226 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2227 def _v4i16 : PatFrag<(ops node:$Rn),
2228 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2229 def _v2i32 : PatFrag<(ops node:$Rn),
2230 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2233 defm NI_sext_high : Neon_sshll2_0<sext>;
2234 defm NI_zext_high : Neon_sshll2_0<zext>;
2237 //===----------------------------------------------------------------------===//
2238 // Multiclasses for NeonI_Across
2239 //===----------------------------------------------------------------------===//
2243 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2244 string asmop, SDPatternOperator opnode>
2246 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2247 (outs FPR16:$Rd), (ins VPR64:$Rn),
2248 asmop # "\t$Rd, $Rn.8b",
2249 [(set (v1i16 FPR16:$Rd),
2250 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2253 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2254 (outs FPR16:$Rd), (ins VPR128:$Rn),
2255 asmop # "\t$Rd, $Rn.16b",
2256 [(set (v1i16 FPR16:$Rd),
2257 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2260 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2261 (outs FPR32:$Rd), (ins VPR64:$Rn),
2262 asmop # "\t$Rd, $Rn.4h",
2263 [(set (v1i32 FPR32:$Rd),
2264 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2267 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2268 (outs FPR32:$Rd), (ins VPR128:$Rn),
2269 asmop # "\t$Rd, $Rn.8h",
2270 [(set (v1i32 FPR32:$Rd),
2271 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2274 // _1d2s doesn't exist!
2276 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2277 (outs FPR64:$Rd), (ins VPR128:$Rn),
2278 asmop # "\t$Rd, $Rn.4s",
2279 [(set (v1i64 FPR64:$Rd),
2280 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2284 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2285 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2289 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2290 string asmop, SDPatternOperator opnode>
2292 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2293 (outs FPR8:$Rd), (ins VPR64:$Rn),
2294 asmop # "\t$Rd, $Rn.8b",
2295 [(set (v1i8 FPR8:$Rd),
2296 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2299 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2300 (outs FPR8:$Rd), (ins VPR128:$Rn),
2301 asmop # "\t$Rd, $Rn.16b",
2302 [(set (v1i8 FPR8:$Rd),
2303 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2306 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2307 (outs FPR16:$Rd), (ins VPR64:$Rn),
2308 asmop # "\t$Rd, $Rn.4h",
2309 [(set (v1i16 FPR16:$Rd),
2310 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2313 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2314 (outs FPR16:$Rd), (ins VPR128:$Rn),
2315 asmop # "\t$Rd, $Rn.8h",
2316 [(set (v1i16 FPR16:$Rd),
2317 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2320 // _1s2s doesn't exist!
2322 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2323 (outs FPR32:$Rd), (ins VPR128:$Rn),
2324 asmop # "\t$Rd, $Rn.4s",
2325 [(set (v1i32 FPR32:$Rd),
2326 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2330 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2331 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2333 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2334 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2336 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2340 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2341 string asmop, SDPatternOperator opnode> {
2342 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2343 (outs FPR32:$Rd), (ins VPR128:$Rn),
2344 asmop # "\t$Rd, $Rn.4s",
2345 [(set (v1f32 FPR32:$Rd),
2346 (v1f32 (opnode (v4f32 VPR128:$Rn))))],
2350 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2351 int_aarch64_neon_vmaxnmv>;
2352 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2353 int_aarch64_neon_vminnmv>;
2355 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2356 int_aarch64_neon_vmaxv>;
2357 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2358 int_aarch64_neon_vminv>;
2360 // The followings are for instruction class (3V Diff)
2362 // normal long/long2 pattern
2363 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2364 string asmop, string ResS, string OpS,
2365 SDPatternOperator opnode, SDPatternOperator ext,
2366 RegisterOperand OpVPR,
2367 ValueType ResTy, ValueType OpTy>
2368 : NeonI_3VDiff<q, u, size, opcode,
2369 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2370 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2371 [(set (ResTy VPR128:$Rd),
2372 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2373 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2376 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2377 string asmop, SDPatternOperator opnode,
2378 bit Commutable = 0> {
2379 let isCommutable = Commutable in {
2380 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2381 opnode, sext, VPR64, v8i16, v8i8>;
2382 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2383 opnode, sext, VPR64, v4i32, v4i16>;
2384 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2385 opnode, sext, VPR64, v2i64, v2i32>;
2389 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2390 SDPatternOperator opnode, bit Commutable = 0> {
2391 let isCommutable = Commutable in {
2392 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2393 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2394 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2395 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2396 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2397 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2401 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2402 SDPatternOperator opnode, bit Commutable = 0> {
2403 let isCommutable = Commutable in {
2404 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2405 opnode, zext, VPR64, v8i16, v8i8>;
2406 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2407 opnode, zext, VPR64, v4i32, v4i16>;
2408 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2409 opnode, zext, VPR64, v2i64, v2i32>;
2413 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2414 SDPatternOperator opnode, bit Commutable = 0> {
2415 let isCommutable = Commutable in {
2416 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2417 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2418 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2419 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2420 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2421 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2425 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2426 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2428 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2429 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2431 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2432 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2434 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2435 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2437 // normal wide/wide2 pattern
2438 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2439 string asmop, string ResS, string OpS,
2440 SDPatternOperator opnode, SDPatternOperator ext,
2441 RegisterOperand OpVPR,
2442 ValueType ResTy, ValueType OpTy>
2443 : NeonI_3VDiff<q, u, size, opcode,
2444 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2445 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2446 [(set (ResTy VPR128:$Rd),
2447 (ResTy (opnode (ResTy VPR128:$Rn),
2448 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2451 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2452 SDPatternOperator opnode> {
2453 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2454 opnode, sext, VPR64, v8i16, v8i8>;
2455 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2456 opnode, sext, VPR64, v4i32, v4i16>;
2457 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2458 opnode, sext, VPR64, v2i64, v2i32>;
2461 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2462 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2464 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2465 SDPatternOperator opnode> {
2466 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2467 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2468 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2469 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2470 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2471 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2474 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2475 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2477 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2478 SDPatternOperator opnode> {
2479 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2480 opnode, zext, VPR64, v8i16, v8i8>;
2481 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2482 opnode, zext, VPR64, v4i32, v4i16>;
2483 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2484 opnode, zext, VPR64, v2i64, v2i32>;
2487 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2488 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2490 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2491 SDPatternOperator opnode> {
2492 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2493 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2494 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2495 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2496 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2497 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2500 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2501 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2503 // Get the high half part of the vector element.
2504 multiclass NeonI_get_high {
2505 def _8h : PatFrag<(ops node:$Rn),
2506 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2507 (v8i16 (Neon_vdup (i32 8)))))))>;
2508 def _4s : PatFrag<(ops node:$Rn),
2509 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2510 (v4i32 (Neon_vdup (i32 16)))))))>;
2511 def _2d : PatFrag<(ops node:$Rn),
2512 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2513 (v2i64 (Neon_vdup (i32 32)))))))>;
2516 defm NI_get_hi : NeonI_get_high;
2518 // pattern for addhn/subhn with 2 operands
2519 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2520 string asmop, string ResS, string OpS,
2521 SDPatternOperator opnode, SDPatternOperator get_hi,
2522 ValueType ResTy, ValueType OpTy>
2523 : NeonI_3VDiff<q, u, size, opcode,
2524 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2525 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2526 [(set (ResTy VPR64:$Rd),
2528 (OpTy (opnode (OpTy VPR128:$Rn),
2529 (OpTy VPR128:$Rm))))))],
2532 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2533 SDPatternOperator opnode, bit Commutable = 0> {
2534 let isCommutable = Commutable in {
2535 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2536 opnode, NI_get_hi_8h, v8i8, v8i16>;
2537 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2538 opnode, NI_get_hi_4s, v4i16, v4i32>;
2539 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2540 opnode, NI_get_hi_2d, v2i32, v2i64>;
2544 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2545 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2547 // pattern for operation with 2 operands
2548 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2549 string asmop, string ResS, string OpS,
2550 SDPatternOperator opnode,
2551 RegisterOperand ResVPR, RegisterOperand OpVPR,
2552 ValueType ResTy, ValueType OpTy>
2553 : NeonI_3VDiff<q, u, size, opcode,
2554 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2555 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2556 [(set (ResTy ResVPR:$Rd),
2557 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2560 // normal narrow pattern
2561 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2562 SDPatternOperator opnode, bit Commutable = 0> {
2563 let isCommutable = Commutable in {
2564 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2565 opnode, VPR64, VPR128, v8i8, v8i16>;
2566 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2567 opnode, VPR64, VPR128, v4i16, v4i32>;
2568 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2569 opnode, VPR64, VPR128, v2i32, v2i64>;
2573 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2574 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2576 // pattern for acle intrinsic with 3 operands
2577 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2578 string asmop, string ResS, string OpS>
2579 : NeonI_3VDiff<q, u, size, opcode,
2580 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2581 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2583 let Constraints = "$src = $Rd";
2584 let neverHasSideEffects = 1;
2587 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2588 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2589 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2590 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2593 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2594 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2596 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2597 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2599 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2601 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2602 SDPatternOperator coreop>
2603 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2604 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2605 (SrcTy VPR128:$Rm)))))),
2606 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2607 VPR128:$Rn, VPR128:$Rm)>;
2610 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2611 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2612 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2613 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2614 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2615 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2618 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2619 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2620 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2621 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2622 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2623 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2626 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2627 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2628 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2631 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2632 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2633 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2635 // pattern that need to extend result
2636 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2637 string asmop, string ResS, string OpS,
2638 SDPatternOperator opnode,
2639 RegisterOperand OpVPR,
2640 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2641 : NeonI_3VDiff<q, u, size, opcode,
2642 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2643 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2644 [(set (ResTy VPR128:$Rd),
2645 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2646 (OpTy OpVPR:$Rm))))))],
2649 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2650 SDPatternOperator opnode, bit Commutable = 0> {
2651 let isCommutable = Commutable in {
2652 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2653 opnode, VPR64, v8i16, v8i8, v8i8>;
2654 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2655 opnode, VPR64, v4i32, v4i16, v4i16>;
2656 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2657 opnode, VPR64, v2i64, v2i32, v2i32>;
2661 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2662 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2664 multiclass NeonI_Op_High<SDPatternOperator op> {
2665 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2666 (op (v8i8 (Neon_High16B node:$Rn)),
2667 (v8i8 (Neon_High16B node:$Rm)))>;
2668 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
2669 (op (v4i16 (Neon_High8H node:$Rn)),
2670 (v4i16 (Neon_High8H node:$Rm)))>;
2671 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
2672 (op (v2i32 (Neon_High4S node:$Rn)),
2673 (v2i32 (Neon_High4S node:$Rm)))>;
2676 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2677 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2678 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2679 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2680 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2681 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2683 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
2684 bit Commutable = 0> {
2685 let isCommutable = Commutable in {
2686 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2687 !cast<PatFrag>(opnode # "_16B"),
2688 VPR128, v8i16, v16i8, v8i8>;
2689 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2690 !cast<PatFrag>(opnode # "_8H"),
2691 VPR128, v4i32, v8i16, v4i16>;
2692 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2693 !cast<PatFrag>(opnode # "_4S"),
2694 VPR128, v2i64, v4i32, v2i32>;
2698 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2699 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2701 // For pattern that need two operators being chained.
2702 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2703 string asmop, string ResS, string OpS,
2704 SDPatternOperator opnode, SDPatternOperator subop,
2705 RegisterOperand OpVPR,
2706 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2707 : NeonI_3VDiff<q, u, size, opcode,
2708 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2709 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2710 [(set (ResTy VPR128:$Rd),
2712 (ResTy VPR128:$src),
2713 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2714 (OpTy OpVPR:$Rm))))))))],
2716 let Constraints = "$src = $Rd";
2719 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
2720 SDPatternOperator opnode, SDPatternOperator subop>{
2721 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2722 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2723 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2724 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2725 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2726 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2729 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2730 add, int_arm_neon_vabds>;
2731 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2732 add, int_arm_neon_vabdu>;
2734 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
2735 SDPatternOperator opnode, string subop> {
2736 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2737 opnode, !cast<PatFrag>(subop # "_16B"),
2738 VPR128, v8i16, v16i8, v8i8>;
2739 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2740 opnode, !cast<PatFrag>(subop # "_8H"),
2741 VPR128, v4i32, v8i16, v4i16>;
2742 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2743 opnode, !cast<PatFrag>(subop # "_4S"),
2744 VPR128, v2i64, v4i32, v2i32>;
2747 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2749 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2752 // Long pattern with 2 operands
2753 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
2754 SDPatternOperator opnode, bit Commutable = 0> {
2755 let isCommutable = Commutable in {
2756 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2757 opnode, VPR128, VPR64, v8i16, v8i8>;
2758 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2759 opnode, VPR128, VPR64, v4i32, v4i16>;
2760 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2761 opnode, VPR128, VPR64, v2i64, v2i32>;
2765 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2766 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2768 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2769 string asmop, string ResS, string OpS,
2770 SDPatternOperator opnode,
2771 ValueType ResTy, ValueType OpTy>
2772 : NeonI_3VDiff<q, u, size, opcode,
2773 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2774 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2775 [(set (ResTy VPR128:$Rd),
2776 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2779 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
2780 string opnode, bit Commutable = 0> {
2781 let isCommutable = Commutable in {
2782 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2783 !cast<PatFrag>(opnode # "_16B"),
2785 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2786 !cast<PatFrag>(opnode # "_8H"),
2788 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2789 !cast<PatFrag>(opnode # "_4S"),
2794 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2796 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2799 // Long pattern with 3 operands
2800 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2801 string asmop, string ResS, string OpS,
2802 SDPatternOperator opnode,
2803 ValueType ResTy, ValueType OpTy>
2804 : NeonI_3VDiff<q, u, size, opcode,
2805 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2806 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2807 [(set (ResTy VPR128:$Rd),
2809 (ResTy VPR128:$src),
2810 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2812 let Constraints = "$src = $Rd";
2815 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
2816 SDPatternOperator opnode> {
2817 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2818 opnode, v8i16, v8i8>;
2819 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2820 opnode, v4i32, v4i16>;
2821 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2822 opnode, v2i64, v2i32>;
2825 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2827 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2829 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2831 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2833 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2835 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2837 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2839 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2841 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2842 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2844 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2845 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2847 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2848 string asmop, string ResS, string OpS,
2849 SDPatternOperator subop, SDPatternOperator opnode,
2850 RegisterOperand OpVPR,
2851 ValueType ResTy, ValueType OpTy>
2852 : NeonI_3VDiff<q, u, size, opcode,
2853 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2854 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2855 [(set (ResTy VPR128:$Rd),
2857 (ResTy VPR128:$src),
2858 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
2860 let Constraints = "$src = $Rd";
2863 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
2864 SDPatternOperator subop, string opnode> {
2865 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2866 subop, !cast<PatFrag>(opnode # "_16B"),
2867 VPR128, v8i16, v16i8>;
2868 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2869 subop, !cast<PatFrag>(opnode # "_8H"),
2870 VPR128, v4i32, v8i16>;
2871 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2872 subop, !cast<PatFrag>(opnode # "_4S"),
2873 VPR128, v2i64, v4i32>;
2876 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
2877 add, "NI_smull_hi">;
2878 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
2879 add, "NI_umull_hi">;
2881 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
2882 sub, "NI_smull_hi">;
2883 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
2884 sub, "NI_umull_hi">;
2886 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
2887 SDPatternOperator opnode> {
2888 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2889 opnode, int_arm_neon_vqdmull,
2890 VPR64, v4i32, v4i16>;
2891 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2892 opnode, int_arm_neon_vqdmull,
2893 VPR64, v2i64, v2i32>;
2896 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
2897 int_arm_neon_vqadds>;
2898 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
2899 int_arm_neon_vqsubs>;
2901 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
2902 SDPatternOperator opnode, bit Commutable = 0> {
2903 let isCommutable = Commutable in {
2904 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2905 opnode, VPR128, VPR64, v4i32, v4i16>;
2906 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2907 opnode, VPR128, VPR64, v2i64, v2i32>;
2911 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
2912 int_arm_neon_vqdmull, 1>;
2914 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
2915 string opnode, bit Commutable = 0> {
2916 let isCommutable = Commutable in {
2917 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2918 !cast<PatFrag>(opnode # "_8H"),
2920 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2921 !cast<PatFrag>(opnode # "_4S"),
2926 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
2929 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
2930 SDPatternOperator opnode> {
2931 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2932 opnode, NI_qdmull_hi_8H,
2933 VPR128, v4i32, v8i16>;
2934 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2935 opnode, NI_qdmull_hi_4S,
2936 VPR128, v2i64, v4i32>;
2939 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
2940 int_arm_neon_vqadds>;
2941 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
2942 int_arm_neon_vqsubs>;
2944 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
2945 SDPatternOperator opnode, bit Commutable = 0> {
2946 let isCommutable = Commutable in {
2947 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2948 opnode, VPR128, VPR64, v8i16, v8i8>;
2952 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
2954 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
2955 string opnode, bit Commutable = 0> {
2956 let isCommutable = Commutable in {
2957 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2958 !cast<PatFrag>(opnode # "_16B"),
2963 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
2966 // End of implementation for instruction class (3V Diff)
2968 // The followings are vector load/store multiple N-element structure
2969 // (class SIMD lselem).
2971 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
2972 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
2973 // The structure consists of a sequence of sets of N values.
2974 // The first element of the structure is placed in the first lane
2975 // of the first first vector, the second element in the first lane
2976 // of the second vector, and so on.
2977 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
2978 // the three 64-bit vectors list {BA, DC, FE}.
2979 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
2980 // 64-bit vectors list {DA, EB, FC}.
2981 // Store instructions store multiple structure to N registers like load.
2984 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
2985 RegisterOperand VecList, string asmop>
2986 : NeonI_LdStMult<q, 1, opcode, size,
2987 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
2988 asmop # "\t$Rt, [$Rn]",
2992 let neverHasSideEffects = 1;
2995 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
2996 def _8B : NeonI_LDVList<0, opcode, 0b00,
2997 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
2999 def _4H : NeonI_LDVList<0, opcode, 0b01,
3000 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3002 def _2S : NeonI_LDVList<0, opcode, 0b10,
3003 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3005 def _16B : NeonI_LDVList<1, opcode, 0b00,
3006 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3008 def _8H : NeonI_LDVList<1, opcode, 0b01,
3009 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3011 def _4S : NeonI_LDVList<1, opcode, 0b10,
3012 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3014 def _2D : NeonI_LDVList<1, opcode, 0b11,
3015 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3018 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3019 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3020 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3022 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3024 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3026 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3028 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3029 defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">;
3030 def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3032 defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3033 def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3035 defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3036 def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3038 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3039 RegisterOperand VecList, string asmop>
3040 : NeonI_LdStMult<q, 0, opcode, size,
3041 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3042 asmop # "\t$Rt, [$Rn]",
3046 let neverHasSideEffects = 1;
3049 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3050 def _8B : NeonI_STVList<0, opcode, 0b00,
3051 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3053 def _4H : NeonI_STVList<0, opcode, 0b01,
3054 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3056 def _2S : NeonI_STVList<0, opcode, 0b10,
3057 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3059 def _16B : NeonI_STVList<1, opcode, 0b00,
3060 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3062 def _8H : NeonI_STVList<1, opcode, 0b01,
3063 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3065 def _4S : NeonI_STVList<1, opcode, 0b10,
3066 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3068 def _2D : NeonI_STVList<1, opcode, 0b11,
3069 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3072 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3073 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3074 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3076 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3078 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3080 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3082 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3083 defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">;
3084 def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3086 defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">;
3087 def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3089 defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">;
3090 def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3092 // End of vector load/store multiple N-element structure(class SIMD lselem)
3094 // The followings are post-index vector load/store multiple N-element
3095 // structure(class SIMD lselem-post)
3096 def exact8_asmoperand : AsmOperandClass {
3097 let Name = "Exact8";
3098 let PredicateMethod = "isExactImm<8>";
3099 let RenderMethod = "addImmOperands";
3101 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3102 let ParserMatchClass = exact8_asmoperand;
3105 def exact16_asmoperand : AsmOperandClass {
3106 let Name = "Exact16";
3107 let PredicateMethod = "isExactImm<16>";
3108 let RenderMethod = "addImmOperands";
3110 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3111 let ParserMatchClass = exact16_asmoperand;
3114 def exact24_asmoperand : AsmOperandClass {
3115 let Name = "Exact24";
3116 let PredicateMethod = "isExactImm<24>";
3117 let RenderMethod = "addImmOperands";
3119 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3120 let ParserMatchClass = exact24_asmoperand;
3123 def exact32_asmoperand : AsmOperandClass {
3124 let Name = "Exact32";
3125 let PredicateMethod = "isExactImm<32>";
3126 let RenderMethod = "addImmOperands";
3128 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3129 let ParserMatchClass = exact32_asmoperand;
3132 def exact48_asmoperand : AsmOperandClass {
3133 let Name = "Exact48";
3134 let PredicateMethod = "isExactImm<48>";
3135 let RenderMethod = "addImmOperands";
3137 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3138 let ParserMatchClass = exact48_asmoperand;
3141 def exact64_asmoperand : AsmOperandClass {
3142 let Name = "Exact64";
3143 let PredicateMethod = "isExactImm<64>";
3144 let RenderMethod = "addImmOperands";
3146 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3147 let ParserMatchClass = exact64_asmoperand;
3150 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3151 RegisterOperand VecList, Operand ImmTy,
3153 let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3154 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3155 def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3156 (outs VecList:$Rt, GPR64xsp:$wb),
3157 (ins GPR64xsp:$Rn, ImmTy:$amt),
3158 asmop # "\t$Rt, [$Rn], $amt",
3164 def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3165 (outs VecList:$Rt, GPR64xsp:$wb),
3166 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3167 asmop # "\t$Rt, [$Rn], $Rm",
3173 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3174 Operand ImmTy2, string asmop> {
3175 defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3176 !cast<RegisterOperand>(List # "8B_operand"),
3179 defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3180 !cast<RegisterOperand>(List # "4H_operand"),
3183 defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3184 !cast<RegisterOperand>(List # "2S_operand"),
3187 defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3188 !cast<RegisterOperand>(List # "16B_operand"),
3191 defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3192 !cast<RegisterOperand>(List # "8H_operand"),
3195 defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3196 !cast<RegisterOperand>(List # "4S_operand"),
3199 defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3200 !cast<RegisterOperand>(List # "2D_operand"),
3204 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3205 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3206 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3209 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3211 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3214 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3216 // Post-index load multiple 1-element structures from N consecutive registers
3218 defm LD1WB2V : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3220 defm LD1WB2V_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3221 uimm_exact16, "ld1">;
3223 defm LD1WB3V : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3225 defm LD1WB3V_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3226 uimm_exact24, "ld1">;
3228 defm LD1WB_4V : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3230 defm LD1WB4V_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3231 uimm_exact32, "ld1">;
3233 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3234 RegisterOperand VecList, Operand ImmTy,
3236 let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3237 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3238 def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3239 (outs GPR64xsp:$wb),
3240 (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3241 asmop # "\t$Rt, [$Rn], $amt",
3247 def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3248 (outs GPR64xsp:$wb),
3249 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3250 asmop # "\t$Rt, [$Rn], $Rm",
3256 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3257 Operand ImmTy2, string asmop> {
3258 defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3259 !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3261 defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3262 !cast<RegisterOperand>(List # "4H_operand"),
3265 defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3266 !cast<RegisterOperand>(List # "2S_operand"),
3269 defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3270 !cast<RegisterOperand>(List # "16B_operand"),
3273 defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3274 !cast<RegisterOperand>(List # "8H_operand"),
3277 defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3278 !cast<RegisterOperand>(List # "4S_operand"),
3281 defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3282 !cast<RegisterOperand>(List # "2D_operand"),
3286 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3287 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3288 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3291 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3293 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3296 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3298 // Post-index load multiple 1-element structures from N consecutive registers
3300 defm ST1WB2V : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3302 defm ST1WB2V_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3303 uimm_exact16, "st1">;
3305 defm ST1WB3V : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3307 defm ST1WB3V_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3308 uimm_exact24, "st1">;
3310 defm ST1WB4V : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3312 defm ST1WB4V_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3313 uimm_exact32, "st1">;
3315 // End of post-index vector load/store multiple N-element structure
3316 // (class SIMD lselem-post)
3318 // Scalar Three Same
3320 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
3322 : NeonI_Scalar3Same<u, size, opcode,
3323 (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
3324 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3328 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
3329 : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
3331 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
3332 bit Commutable = 0> {
3333 let isCommutable = Commutable in {
3334 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
3335 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
3339 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
3340 string asmop, bit Commutable = 0> {
3341 let isCommutable = Commutable in {
3342 def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
3343 def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
3347 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
3348 string asmop, bit Commutable = 0> {
3349 let isCommutable = Commutable in {
3350 def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
3351 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
3352 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
3353 def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
3357 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
3358 Instruction INSTD> {
3359 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3360 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3363 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
3368 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
3369 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
3370 (INSTB FPR8:$Rn, FPR8:$Rm)>;
3372 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3373 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3375 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3376 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3379 class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
3381 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3382 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3384 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
3386 Instruction INSTS> {
3387 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3388 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3389 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3390 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3393 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
3395 Instruction INSTD> {
3396 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3397 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3398 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3399 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3402 multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
3404 Instruction INSTD> {
3405 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3406 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3407 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3408 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3411 // Scalar Three Different
3413 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
3414 RegisterClass FPRCD, RegisterClass FPRCS>
3415 : NeonI_Scalar3Diff<u, size, opcode,
3416 (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
3417 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3421 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
3422 def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
3423 def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
3426 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
3427 let Constraints = "$Src = $Rd" in {
3428 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
3429 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
3430 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3433 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
3434 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
3435 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3441 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
3443 Instruction INSTS> {
3444 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3445 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3446 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3447 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3450 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
3452 Instruction INSTS> {
3453 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3454 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
3455 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3456 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
3459 // Scalar Two Registers Miscellaneous
3461 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
3462 RegisterClass FPRCD, RegisterClass FPRCS>
3463 : NeonI_Scalar2SameMisc<u, size, opcode,
3464 (outs FPRCD:$Rd), (ins FPRCS:$Rn),
3465 !strconcat(asmop, "\t$Rd, $Rn"),
3469 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
3471 def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
3473 def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
3477 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
3478 def dd: NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
3481 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
3482 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
3483 def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
3484 def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
3485 def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
3488 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
3490 def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
3491 def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
3492 def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
3495 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
3496 string asmop, RegisterClass FPRC>
3497 : NeonI_Scalar2SameMisc<u, size, opcode,
3498 (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
3499 !strconcat(asmop, "\t$Rd, $Rn"),
3503 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
3506 let Constraints = "$Src = $Rd" in {
3507 def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
3508 def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
3509 def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
3510 def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
3514 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
3515 SDPatternOperator Dopnode,
3517 Instruction INSTD> {
3518 def : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn))),
3520 def : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn))),
3524 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
3526 Instruction INSTD> {
3527 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
3529 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
3533 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
3534 : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3535 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
3536 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3540 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
3542 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3543 (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
3544 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
3547 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3548 (outs FPR64:$Rd), (ins FPR64:$Rn, fpz64movi:$FPImm),
3549 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
3554 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
3556 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
3557 (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))),
3558 (INSTD FPR64:$Rn, 0)>;
3560 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
3562 Instruction INSTD> {
3563 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
3564 (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))),
3565 (INSTS FPR32:$Rn, fpimm:$FPImm)>;
3566 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
3567 (v1f64 (bitconvert (v8i8 Neon_immAllZeros))))),
3568 (INSTD FPR64:$Rn, 0)>;
3571 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
3572 Instruction INSTD> {
3573 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
3577 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
3582 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
3583 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
3585 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
3587 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
3591 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
3592 SDPatternOperator opnode,
3595 Instruction INSTD> {
3596 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
3598 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
3600 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
3605 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
3606 SDPatternOperator opnode,
3610 Instruction INSTD> {
3611 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
3612 (INSTB FPR8:$Src, FPR8:$Rn)>;
3613 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
3614 (INSTH FPR16:$Src, FPR16:$Rn)>;
3615 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
3616 (INSTS FPR32:$Src, FPR32:$Rn)>;
3617 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
3618 (INSTD FPR64:$Src, FPR64:$Rn)>;
3621 // Scalar Shift By Immediate
3623 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
3624 RegisterClass FPRC, Operand ImmTy>
3625 : NeonI_ScalarShiftImm<u, opcode,
3626 (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
3627 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3630 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
3632 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
3634 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3635 let Inst{21-16} = Imm;
3639 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
3641 : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
3642 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
3644 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3645 let Inst{18-16} = Imm;
3647 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
3649 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3650 let Inst{19-16} = Imm;
3652 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
3654 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3655 let Inst{20-16} = Imm;
3659 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
3661 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
3663 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3664 let Inst{21-16} = Imm;
3668 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
3670 : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
3671 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
3673 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3674 let Inst{18-16} = Imm;
3676 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
3678 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3679 let Inst{19-16} = Imm;
3681 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
3683 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3684 let Inst{20-16} = Imm;
3688 class NeonI_ScalarShiftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
3689 : NeonI_ScalarShiftImm<u, opcode,
3690 (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
3691 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3694 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3695 let Inst{21-16} = Imm;
3696 let Constraints = "$Src = $Rd";
3699 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
3700 RegisterClass FPRCD, RegisterClass FPRCS,
3702 : NeonI_ScalarShiftImm<u, opcode,
3703 (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
3704 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3707 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
3709 def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
3712 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3713 let Inst{18-16} = Imm;
3715 def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
3718 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3719 let Inst{19-16} = Imm;
3721 def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
3724 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3725 let Inst{20-16} = Imm;
3729 multiclass NeonI_ScalarShiftImm_scvtf_SD_size<bit u, bits<5> opcode, string asmop> {
3730 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
3732 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3733 let Inst{20-16} = Imm;
3735 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
3737 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3738 let Inst{21-16} = Imm;
3742 multiclass Neon_ScalarShiftImm_D_size_patterns<SDPatternOperator opnode,
3743 Instruction INSTD> {
3744 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3745 (INSTD FPR64:$Rn, imm:$Imm)>;
3748 multiclass Neon_ScalarShiftImm_BHSD_size_patterns<SDPatternOperator opnode,
3753 : Neon_ScalarShiftImm_D_size_patterns<opnode, INSTD> {
3754 def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 imm:$Imm))),
3755 (INSTB FPR8:$Rn, imm:$Imm)>;
3756 def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
3757 (INSTH FPR16:$Rn, imm:$Imm)>;
3758 def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3759 (INSTS FPR32:$Rn, imm:$Imm)>;
3762 class Neon_ScalarShiftImm_accum_D_size_patterns<SDPatternOperator opnode,
3764 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3765 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
3767 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
3768 SDPatternOperator opnode,
3771 Instruction INSTD> {
3772 def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
3773 (INSTH FPR16:$Rn, imm:$Imm)>;
3774 def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3775 (INSTS FPR32:$Rn, imm:$Imm)>;
3776 def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3777 (INSTD FPR64:$Rn, imm:$Imm)>;
3780 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode,
3781 SDPatternOperator Dopnode,
3783 Instruction INSTD> {
3784 def ssi : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3785 (INSTS FPR32:$Rn, imm:$Imm)>;
3786 def ddi : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3787 (INSTD FPR64:$Rn, imm:$Imm)>;
3790 // Scalar Signed Shift Right (Immediate)
3791 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
3792 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
3794 // Scalar Unsigned Shift Right (Immediate)
3795 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
3796 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
3798 // Scalar Signed Rounding Shift Right (Immediate)
3799 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
3800 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrds_n, SRSHRddi>;
3802 // Scalar Unigned Rounding Shift Right (Immediate)
3803 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
3804 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrdu_n, URSHRddi>;
3806 // Scalar Signed Shift Right and Accumulate (Immediate)
3807 def SSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00010, "ssra">;
3808 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsrads_n, SSRA>;
3810 // Scalar Unsigned Shift Right and Accumulate (Immediate)
3811 def USRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00010, "usra">;
3812 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsradu_n, USRA>;
3814 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
3815 def SRSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00110, "srsra">;
3816 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsrads_n, SRSRA>;
3818 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
3819 def URSRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00110, "ursra">;
3820 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsradu_n, URSRA>;
3822 // Scalar Shift Left (Immediate)
3823 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
3824 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
3826 // Signed Saturating Shift Left (Immediate)
3827 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
3828 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
3830 SQSHLssi, SQSHLddi>;
3832 // Unsigned Saturating Shift Left (Immediate)
3833 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
3834 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
3836 UQSHLssi, UQSHLddi>;
3838 // Signed Saturating Shift Left Unsigned (Immediate)
3839 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
3840 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlus_n,
3841 SQSHLUbbi, SQSHLUhhi,
3842 SQSHLUssi, SQSHLUddi>;
3844 // Shift Right And Insert (Immediate)
3845 defm SRI : NeonI_ScalarShiftRightImm_D_size<0b1, 0b01000, "sri">;
3846 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vsrid_n, SRIddi>;
3848 // Shift Left And Insert (Immediate)
3849 defm SLI : NeonI_ScalarShiftLeftImm_D_size<0b1, 0b01010, "sli">;
3850 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vslid_n, SLIddi>;
3852 // Signed Saturating Shift Right Narrow (Immediate)
3853 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
3854 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
3855 SQSHRNbhi, SQSHRNhsi,
3858 // Unsigned Saturating Shift Right Narrow (Immediate)
3859 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
3860 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
3861 UQSHRNbhi, UQSHRNhsi,
3864 // Signed Saturating Rounded Shift Right Narrow (Immediate)
3865 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
3866 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
3867 SQRSHRNbhi, SQRSHRNhsi,
3870 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
3871 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
3872 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
3873 UQRSHRNbhi, UQRSHRNhsi,
3876 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
3877 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
3878 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
3879 SQSHRUNbhi, SQSHRUNhsi,
3882 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
3883 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
3884 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
3885 SQRSHRUNbhi, SQRSHRUNhsi,
3888 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
3889 defm SCVTF_N : NeonI_ScalarShiftImm_scvtf_SD_size<0b0, 0b11100, "scvtf">;
3890 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32,
3891 int_aarch64_neon_vcvtf64_n_s64,
3892 SCVTF_Nssi, SCVTF_Nddi>;
3894 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
3895 defm UCVTF_N : NeonI_ScalarShiftImm_scvtf_SD_size<0b1, 0b11100, "ucvtf">;
3896 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32,
3897 int_aarch64_neon_vcvtf64_n_u64,
3898 UCVTF_Nssi, UCVTF_Nddi>;
3900 // Scalar Integer Add
3901 let isCommutable = 1 in {
3902 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
3905 // Scalar Integer Sub
3906 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
3908 // Pattern for Scalar Integer Add and Sub with D register only
3909 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
3910 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
3912 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
3913 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
3914 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
3915 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
3916 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
3918 // Scalar Integer Saturating Add (Signed, Unsigned)
3919 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
3920 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
3922 // Scalar Integer Saturating Sub (Signed, Unsigned)
3923 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
3924 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
3926 // Patterns to match llvm.arm.* intrinsic for
3927 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3928 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
3929 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
3930 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
3931 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
3933 // Patterns to match llvm.aarch64.* intrinsic for
3934 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3935 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
3936 SQADDhhh, SQADDsss, SQADDddd>;
3937 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
3938 UQADDhhh, UQADDsss, UQADDddd>;
3939 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
3940 SQSUBhhh, SQSUBsss, SQSUBddd>;
3941 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
3942 UQSUBhhh, UQSUBsss, UQSUBddd>;
3944 // Scalar Integer Saturating Doubling Multiply Half High
3945 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
3947 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3948 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
3950 // Patterns to match llvm.arm.* intrinsic for
3951 // Scalar Integer Saturating Doubling Multiply Half High and
3952 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3953 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
3955 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
3958 // Scalar Floating-point Multiply Extended
3959 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
3961 // Scalar Floating-point Reciprocal Step
3962 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
3964 // Scalar Floating-point Reciprocal Square Root Step
3965 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
3967 // Patterns to match llvm.arm.* intrinsic for
3968 // Scalar Floating-point Reciprocal Step and
3969 // Scalar Floating-point Reciprocal Square Root Step
3970 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
3972 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
3975 // Patterns to match llvm.aarch64.* intrinsic for
3976 // Scalar Floating-point Multiply Extended,
3977 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vmulx, FMULXsss,
3980 // Scalar Integer Shift Left (Signed, Unsigned)
3981 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
3982 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
3984 // Patterns to match llvm.arm.* intrinsic for
3985 // Scalar Integer Shift Left (Signed, Unsigned)
3986 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
3987 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
3989 // Patterns to match llvm.aarch64.* intrinsic for
3990 // Scalar Integer Shift Left (Signed, Unsigned)
3991 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
3992 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
3994 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
3995 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
3996 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
3998 // Patterns to match llvm.aarch64.* intrinsic for
3999 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
4000 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
4001 SQSHLhhh, SQSHLsss, SQSHLddd>;
4002 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
4003 UQSHLhhh, UQSHLsss, UQSHLddd>;
4005 // Patterns to match llvm.arm.* intrinsic for
4006 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
4007 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
4008 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
4010 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4011 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
4012 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
4014 // Patterns to match llvm.aarch64.* intrinsic for
4015 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4016 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
4017 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
4019 // Patterns to match llvm.arm.* intrinsic for
4020 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4021 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
4022 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
4024 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4025 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
4026 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
4028 // Patterns to match llvm.aarch64.* intrinsic for
4029 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4030 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
4031 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
4032 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
4033 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
4035 // Patterns to match llvm.arm.* intrinsic for
4036 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4037 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
4038 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
4040 // Signed Saturating Doubling Multiply-Add Long
4041 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
4042 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
4043 SQDMLALshh, SQDMLALdss>;
4045 // Signed Saturating Doubling Multiply-Subtract Long
4046 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
4047 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
4048 SQDMLSLshh, SQDMLSLdss>;
4050 // Signed Saturating Doubling Multiply Long
4051 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
4052 defm : Neon_Scalar3Diff_HS_size_patterns<int_aarch64_neon_vqdmull,
4053 SQDMULLshh, SQDMULLdss>;
4055 // Scalar Signed Integer Convert To Floating-point
4056 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
4057 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
4058 int_aarch64_neon_vcvtf64_s64,
4061 // Scalar Unsigned Integer Convert To Floating-point
4062 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
4063 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
4064 int_aarch64_neon_vcvtf64_u64,
4067 // Scalar Floating-point Reciprocal Estimate
4068 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
4069 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
4070 FRECPEss, FRECPEdd>;
4072 // Scalar Floating-point Reciprocal Exponent
4073 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
4074 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
4075 FRECPXss, FRECPXdd>;
4077 // Scalar Floating-point Reciprocal Square Root Estimate
4078 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
4079 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
4080 FRSQRTEss, FRSQRTEdd>;
4082 // Scalar Integer Compare
4084 // Scalar Compare Bitwise Equal
4085 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
4086 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
4088 // Scalar Compare Signed Greather Than Or Equal
4089 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
4090 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
4092 // Scalar Compare Unsigned Higher Or Same
4093 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
4094 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
4096 // Scalar Compare Unsigned Higher
4097 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
4098 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
4100 // Scalar Compare Signed Greater Than
4101 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
4102 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
4104 // Scalar Compare Bitwise Test Bits
4105 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
4106 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
4108 // Scalar Compare Bitwise Equal To Zero
4109 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
4110 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
4113 // Scalar Compare Signed Greather Than Or Equal To Zero
4114 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
4115 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
4118 // Scalar Compare Signed Greater Than Zero
4119 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
4120 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
4123 // Scalar Compare Signed Less Than Or Equal To Zero
4124 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
4125 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
4128 // Scalar Compare Less Than Zero
4129 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
4130 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
4133 // Scalar Floating-point Compare
4135 // Scalar Floating-point Compare Mask Equal
4136 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
4137 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
4138 FCMEQsss, FCMEQddd>;
4140 // Scalar Floating-point Compare Mask Equal To Zero
4141 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
4142 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
4143 FCMEQZssi, FCMEQZddi>;
4145 // Scalar Floating-point Compare Mask Greater Than Or Equal
4146 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
4147 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
4148 FCMGEsss, FCMGEddd>;
4150 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
4151 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
4152 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
4153 FCMGEZssi, FCMGEZddi>;
4155 // Scalar Floating-point Compare Mask Greather Than
4156 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
4157 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
4158 FCMGTsss, FCMGTddd>;
4160 // Scalar Floating-point Compare Mask Greather Than Zero
4161 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
4162 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
4163 FCMGTZssi, FCMGTZddi>;
4165 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
4166 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
4167 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
4168 FCMLEZssi, FCMLEZddi>;
4170 // Scalar Floating-point Compare Mask Less Than Zero
4171 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
4172 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
4173 FCMLTZssi, FCMLTZddi>;
4175 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
4176 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
4177 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
4178 FACGEsss, FACGEddd>;
4180 // Scalar Floating-point Absolute Compare Mask Greater Than
4181 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
4182 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
4183 FACGTsss, FACGTddd>;
4185 // Scalar Absolute Value
4186 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
4187 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
4189 // Scalar Signed Saturating Absolute Value
4190 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
4191 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
4192 SQABSbb, SQABShh, SQABSss, SQABSdd>;
4195 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
4196 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
4198 // Scalar Signed Saturating Negate
4199 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
4200 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
4201 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
4203 // Scalar Signed Saturating Accumulated of Unsigned Value
4204 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
4205 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
4207 SUQADDss, SUQADDdd>;
4209 // Scalar Unsigned Saturating Accumulated of Signed Value
4210 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
4211 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
4213 USQADDss, USQADDdd>;
4215 // Scalar Signed Saturating Extract Unsigned Narrow
4216 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
4217 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
4221 // Scalar Signed Saturating Extract Narrow
4222 defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
4223 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
4227 // Scalar Unsigned Saturating Extract Narrow
4228 defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
4229 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
4233 // Scalar Reduce Pairwise
4235 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
4236 string asmop, bit Commutable = 0> {
4237 let isCommutable = Commutable in {
4238 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
4239 (outs FPR64:$Rd), (ins VPR128:$Rn),
4240 !strconcat(asmop, "\t$Rd, $Rn.2d"),
4246 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
4247 string asmop, bit Commutable = 0>
4248 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
4249 let isCommutable = Commutable in {
4250 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
4251 (outs FPR32:$Rd), (ins VPR64:$Rn),
4252 !strconcat(asmop, "\t$Rd, $Rn.2s"),
4258 // Scalar Reduce Addition Pairwise (Integer) with
4259 // Pattern to match llvm.arm.* intrinsic
4260 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
4262 // Pattern to match llvm.aarch64.* intrinsic for
4263 // Scalar Reduce Addition Pairwise (Integer)
4264 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
4265 (ADDPvv_D_2D VPR128:$Rn)>;
4267 // Scalar Reduce Addition Pairwise (Floating Point)
4268 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
4270 // Scalar Reduce Maximum Pairwise (Floating Point)
4271 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
4273 // Scalar Reduce Minimum Pairwise (Floating Point)
4274 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
4276 // Scalar Reduce maxNum Pairwise (Floating Point)
4277 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
4279 // Scalar Reduce minNum Pairwise (Floating Point)
4280 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
4282 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
4283 SDPatternOperator opnodeD,
4285 Instruction INSTD> {
4286 def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
4288 def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
4289 (INSTD VPR128:$Rn)>;
4292 // Patterns to match llvm.aarch64.* intrinsic for
4293 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
4294 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
4295 int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
4297 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
4298 int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
4300 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
4301 int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
4303 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
4304 int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
4306 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
4307 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
4311 //===----------------------------------------------------------------------===//
4312 // Non-Instruction Patterns
4313 //===----------------------------------------------------------------------===//
4315 // 64-bit vector bitcasts...
4317 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
4318 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
4319 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
4320 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
4322 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
4323 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
4324 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
4325 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
4327 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
4328 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
4329 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
4330 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
4332 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
4333 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
4334 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
4335 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
4337 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
4338 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
4339 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
4340 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
4342 // ..and 128-bit vector bitcasts...
4344 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
4345 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
4346 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
4347 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
4348 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
4350 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
4351 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
4352 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
4353 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
4354 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
4356 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
4357 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
4358 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
4359 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
4360 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
4362 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
4363 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
4364 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
4365 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
4366 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
4368 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
4369 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
4370 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
4371 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
4372 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
4374 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
4375 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
4376 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
4377 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
4378 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
4381 // ...and scalar bitcasts...
4382 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
4383 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
4384 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
4385 def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
4386 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
4388 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
4389 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
4391 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
4392 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
4393 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
4395 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
4396 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
4397 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
4398 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
4399 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
4401 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
4402 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
4403 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
4404 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
4405 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
4406 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
4408 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
4409 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
4410 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
4411 def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
4412 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
4414 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
4415 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
4417 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
4418 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
4419 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
4420 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
4421 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
4423 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
4424 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
4425 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
4426 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
4427 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
4428 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
4430 def neon_uimm0_bare : Operand<i64>,
4431 ImmLeaf<i64, [{return Imm == 0;}]> {
4432 let ParserMatchClass = neon_uimm0_asmoperand;
4433 let PrintMethod = "printNeonUImm8OperandBare";
4436 def neon_uimm1_bare : Operand<i64>,
4437 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4438 let ParserMatchClass = neon_uimm1_asmoperand;
4439 let PrintMethod = "printNeonUImm8OperandBare";
4442 def neon_uimm2_bare : Operand<i64>,
4443 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4444 let ParserMatchClass = neon_uimm2_asmoperand;
4445 let PrintMethod = "printNeonUImm8OperandBare";
4448 def neon_uimm3_bare : Operand<i64>,
4449 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4450 let ParserMatchClass = uimm3_asmoperand;
4451 let PrintMethod = "printNeonUImm8OperandBare";
4454 def neon_uimm4_bare : Operand<i64>,
4455 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4456 let ParserMatchClass = uimm4_asmoperand;
4457 let PrintMethod = "printNeonUImm8OperandBare";
4460 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
4461 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
4462 : NeonI_copy<0b1, 0b0, 0b0011,
4463 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
4464 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
4465 [(set (ResTy VPR128:$Rd),
4466 (ResTy (vector_insert
4467 (ResTy VPR128:$src),
4472 let Constraints = "$src = $Rd";
4475 // The followings are for instruction class (3V Elem)
4479 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
4480 string asmop, string ResS, string OpS, string EleOpS,
4481 Operand OpImm, RegisterOperand ResVPR,
4482 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
4483 : NeonI_2VElem<q, u, size, opcode,
4484 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
4485 EleOpVPR:$Re, OpImm:$Index),
4486 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
4487 ", $Re." # EleOpS # "[$Index]",
4493 let Constraints = "$src = $Rd";
4496 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
4497 // vector register class for element is always 128-bit to cover the max index
4498 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4499 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4500 let Inst{11} = {Index{1}};
4501 let Inst{21} = {Index{0}};
4502 let Inst{20-16} = Re;
4505 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4506 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4507 let Inst{11} = {Index{1}};
4508 let Inst{21} = {Index{0}};
4509 let Inst{20-16} = Re;
4512 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4513 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
4514 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
4515 let Inst{11} = {Index{2}};
4516 let Inst{21} = {Index{1}};
4517 let Inst{20} = {Index{0}};
4518 let Inst{19-16} = Re{3-0};
4521 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
4522 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4523 let Inst{11} = {Index{2}};
4524 let Inst{21} = {Index{1}};
4525 let Inst{20} = {Index{0}};
4526 let Inst{19-16} = Re{3-0};
4530 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
4531 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
4533 // Pattern for lane in 128-bit vector
4534 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4535 RegisterOperand ResVPR, RegisterOperand OpVPR,
4536 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
4537 ValueType EleOpTy, SDPatternOperator coreop>
4538 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
4539 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4540 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4542 // Pattern for lane in 64-bit vector
4543 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4544 RegisterOperand ResVPR, RegisterOperand OpVPR,
4545 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
4546 ValueType EleOpTy, SDPatternOperator coreop>
4547 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
4548 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4549 (INST ResVPR:$src, OpVPR:$Rn,
4550 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4552 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
4554 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4555 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32,
4556 BinOpFrag<(Neon_vduplane
4557 (Neon_low4S node:$LHS), node:$RHS)>>;
4559 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4560 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32,
4561 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4563 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
4564 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
4565 BinOpFrag<(Neon_vduplane
4566 (Neon_low8H node:$LHS), node:$RHS)>>;
4568 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
4569 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
4570 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4572 // Index can only be half of the max value for lane in 64-bit vector
4574 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4575 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32,
4576 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4578 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4579 op, VPR128, VPR128, VPR64, v4i32, v4i32, v2i32,
4580 BinOpFrag<(Neon_vduplane
4581 (Neon_combine_4S node:$LHS, undef),
4584 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
4585 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
4586 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4588 def : NI_2VE_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
4589 op, VPR128, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
4590 BinOpFrag<(Neon_vduplane
4591 (Neon_combine_8H node:$LHS, undef),
4595 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
4596 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
4598 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
4599 string asmop, string ResS, string OpS, string EleOpS,
4600 Operand OpImm, RegisterOperand ResVPR,
4601 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
4602 : NeonI_2VElem<q, u, size, opcode,
4603 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
4604 EleOpVPR:$Re, OpImm:$Index),
4605 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
4606 ", $Re." # EleOpS # "[$Index]",
4613 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
4614 // vector register class for element is always 128-bit to cover the max index
4615 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4616 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4617 let Inst{11} = {Index{1}};
4618 let Inst{21} = {Index{0}};
4619 let Inst{20-16} = Re;
4622 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4623 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4624 let Inst{11} = {Index{1}};
4625 let Inst{21} = {Index{0}};
4626 let Inst{20-16} = Re;
4629 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4630 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
4631 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
4632 let Inst{11} = {Index{2}};
4633 let Inst{21} = {Index{1}};
4634 let Inst{20} = {Index{0}};
4635 let Inst{19-16} = Re{3-0};
4638 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
4639 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4640 let Inst{11} = {Index{2}};
4641 let Inst{21} = {Index{1}};
4642 let Inst{20} = {Index{0}};
4643 let Inst{19-16} = Re{3-0};
4647 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
4648 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
4649 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
4651 // Pattern for lane in 128-bit vector
4652 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4653 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4654 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4655 SDPatternOperator coreop>
4656 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4657 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4658 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4660 // Pattern for lane in 64-bit vector
4661 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4662 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4663 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4664 SDPatternOperator coreop>
4665 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4666 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4668 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4670 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
4671 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4672 op, VPR64, VPR128, v2i32, v2i32, v4i32,
4673 BinOpFrag<(Neon_vduplane
4674 (Neon_low4S node:$LHS), node:$RHS)>>;
4676 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4677 op, VPR128, VPR128, v4i32, v4i32, v4i32,
4678 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4680 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
4681 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
4682 BinOpFrag<(Neon_vduplane
4683 (Neon_low8H node:$LHS), node:$RHS)>>;
4685 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
4686 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
4687 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4689 // Index can only be half of the max value for lane in 64-bit vector
4691 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4692 op, VPR64, VPR64, v2i32, v2i32, v2i32,
4693 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4695 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4696 op, VPR128, VPR64, v4i32, v4i32, v2i32,
4697 BinOpFrag<(Neon_vduplane
4698 (Neon_combine_4S node:$LHS, undef),
4701 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
4702 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
4703 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4705 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
4706 op, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
4707 BinOpFrag<(Neon_vduplane
4708 (Neon_combine_8H node:$LHS, undef),
4712 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
4713 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
4714 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
4718 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
4719 // vector register class for element is always 128-bit to cover the max index
4720 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4721 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4722 let Inst{11} = {Index{1}};
4723 let Inst{21} = {Index{0}};
4724 let Inst{20-16} = Re;
4727 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4728 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4729 let Inst{11} = {Index{1}};
4730 let Inst{21} = {Index{0}};
4731 let Inst{20-16} = Re;
4734 // _1d2d doesn't exist!
4736 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4737 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4738 let Inst{11} = {Index{0}};
4740 let Inst{20-16} = Re;
4744 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
4745 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
4747 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
4748 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4749 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4750 SDPatternOperator coreop>
4751 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4752 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
4754 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
4756 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
4757 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4758 op, VPR64, VPR128, v2f32, v2f32, v4f32,
4759 BinOpFrag<(Neon_vduplane
4760 (Neon_low4f node:$LHS), node:$RHS)>>;
4762 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4763 op, VPR128, VPR128, v4f32, v4f32, v4f32,
4764 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4766 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
4767 op, VPR128, VPR128, v2f64, v2f64, v2f64,
4768 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4770 // Index can only be half of the max value for lane in 64-bit vector
4772 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4773 op, VPR64, VPR64, v2f32, v2f32, v2f32,
4774 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4776 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4777 op, VPR128, VPR64, v4f32, v4f32, v2f32,
4778 BinOpFrag<(Neon_vduplane
4779 (Neon_combine_4f node:$LHS, undef),
4782 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
4783 op, VPR128, VPR64, v2f64, v2f64, v1f64,
4784 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4787 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
4788 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
4790 // The followings are patterns using fma
4791 // -ffp-contract=fast generates fma
4793 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
4794 // vector register class for element is always 128-bit to cover the max index
4795 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4796 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4797 let Inst{11} = {Index{1}};
4798 let Inst{21} = {Index{0}};
4799 let Inst{20-16} = Re;
4802 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4803 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4804 let Inst{11} = {Index{1}};
4805 let Inst{21} = {Index{0}};
4806 let Inst{20-16} = Re;
4809 // _1d2d doesn't exist!
4811 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4812 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4813 let Inst{11} = {Index{0}};
4815 let Inst{20-16} = Re;
4819 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
4820 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
4822 // Pattern for lane in 128-bit vector
4823 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4824 RegisterOperand ResVPR, RegisterOperand OpVPR,
4825 ValueType ResTy, ValueType OpTy,
4826 SDPatternOperator coreop>
4827 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4828 (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
4829 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
4831 // Pattern for lane in 64-bit vector
4832 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4833 RegisterOperand ResVPR, RegisterOperand OpVPR,
4834 ValueType ResTy, ValueType OpTy,
4835 SDPatternOperator coreop>
4836 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4837 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4838 (INST ResVPR:$src, ResVPR:$Rn,
4839 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
4841 // Pattern for lane in 64-bit vector
4842 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
4843 SDPatternOperator op,
4844 RegisterOperand ResVPR, RegisterOperand OpVPR,
4845 ValueType ResTy, ValueType OpTy,
4846 SDPatternOperator coreop>
4847 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
4848 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4849 (INST ResVPR:$src, ResVPR:$Rn,
4850 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
4853 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
4854 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4855 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4856 BinOpFrag<(Neon_vduplane
4857 (Neon_low4f node:$LHS), node:$RHS)>>;
4859 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4860 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4861 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4863 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4864 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4865 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4867 // Index can only be half of the max value for lane in 64-bit vector
4869 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4870 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4871 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4873 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4874 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4875 BinOpFrag<(Neon_vduplane
4876 (Neon_combine_4f node:$LHS, undef),
4879 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4880 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4881 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4884 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
4886 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
4888 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4889 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4890 BinOpFrag<(fneg (Neon_vduplane
4891 (Neon_low4f node:$LHS), node:$RHS))>>;
4893 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4894 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4895 BinOpFrag<(Neon_vduplane
4896 (Neon_low4f (fneg node:$LHS)),
4899 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4900 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4901 BinOpFrag<(fneg (Neon_vduplane
4902 node:$LHS, node:$RHS))>>;
4904 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4905 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4906 BinOpFrag<(Neon_vduplane
4907 (fneg node:$LHS), node:$RHS)>>;
4909 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4910 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4911 BinOpFrag<(fneg (Neon_vduplane
4912 node:$LHS, node:$RHS))>>;
4914 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4915 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4916 BinOpFrag<(Neon_vduplane
4917 (fneg node:$LHS), node:$RHS)>>;
4919 // Index can only be half of the max value for lane in 64-bit vector
4921 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4922 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4923 BinOpFrag<(fneg (Neon_vduplane
4924 node:$LHS, node:$RHS))>>;
4926 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4927 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4928 BinOpFrag<(Neon_vduplane
4929 (fneg node:$LHS), node:$RHS)>>;
4931 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4932 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4933 BinOpFrag<(fneg (Neon_vduplane
4934 (Neon_combine_4f node:$LHS, undef),
4937 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4938 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4939 BinOpFrag<(Neon_vduplane
4940 (Neon_combine_4f (fneg node:$LHS), undef),
4943 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4944 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4945 BinOpFrag<(fneg (Neon_combine_2d
4946 node:$LHS, node:$RHS))>>;
4948 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4949 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4950 BinOpFrag<(Neon_combine_2d
4951 (fneg node:$LHS), (fneg node:$RHS))>>;
4954 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
4956 // Variant 3: Long type
4957 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
4958 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
4960 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
4961 // vector register class for element is always 128-bit to cover the max index
4962 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4963 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4964 let Inst{11} = {Index{1}};
4965 let Inst{21} = {Index{0}};
4966 let Inst{20-16} = Re;
4969 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4970 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4971 let Inst{11} = {Index{1}};
4972 let Inst{21} = {Index{0}};
4973 let Inst{20-16} = Re;
4976 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4977 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4978 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4979 let Inst{11} = {Index{2}};
4980 let Inst{21} = {Index{1}};
4981 let Inst{20} = {Index{0}};
4982 let Inst{19-16} = Re{3-0};
4985 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4986 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4987 let Inst{11} = {Index{2}};
4988 let Inst{21} = {Index{1}};
4989 let Inst{20} = {Index{0}};
4990 let Inst{19-16} = Re{3-0};
4994 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
4995 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
4996 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
4997 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
4998 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
4999 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
5001 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
5002 // vector register class for element is always 128-bit to cover the max index
5003 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
5004 neon_uimm2_bare, VPR128, VPR64, VPR128> {
5005 let Inst{11} = {Index{1}};
5006 let Inst{21} = {Index{0}};
5007 let Inst{20-16} = Re;
5010 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
5011 neon_uimm2_bare, VPR128, VPR128, VPR128> {
5012 let Inst{11} = {Index{1}};
5013 let Inst{21} = {Index{0}};
5014 let Inst{20-16} = Re;
5017 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5018 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
5019 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5020 let Inst{11} = {Index{2}};
5021 let Inst{21} = {Index{1}};
5022 let Inst{20} = {Index{0}};
5023 let Inst{19-16} = Re{3-0};
5026 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
5027 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
5028 let Inst{11} = {Index{2}};
5029 let Inst{21} = {Index{1}};
5030 let Inst{20} = {Index{0}};
5031 let Inst{19-16} = Re{3-0};
5035 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
5036 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
5037 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
5039 // Pattern for lane in 128-bit vector
5040 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5041 RegisterOperand EleOpVPR, ValueType ResTy,
5042 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5043 SDPatternOperator hiop, SDPatternOperator coreop>
5044 : Pat<(ResTy (op (ResTy VPR128:$src),
5045 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5046 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5047 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5049 // Pattern for lane in 64-bit vector
5050 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5051 RegisterOperand EleOpVPR, ValueType ResTy,
5052 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5053 SDPatternOperator hiop, SDPatternOperator coreop>
5054 : Pat<(ResTy (op (ResTy VPR128:$src),
5055 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5056 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5057 (INST VPR128:$src, VPR128:$Rn,
5058 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5060 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
5061 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
5062 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
5063 BinOpFrag<(Neon_vduplane
5064 (Neon_low8H node:$LHS), node:$RHS)>>;
5066 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
5067 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32,
5068 BinOpFrag<(Neon_vduplane
5069 (Neon_low4S node:$LHS), node:$RHS)>>;
5071 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
5072 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H,
5073 BinOpFrag<(Neon_vduplane
5074 (Neon_low8H node:$LHS), node:$RHS)>>;
5076 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
5077 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
5078 BinOpFrag<(Neon_vduplane
5079 (Neon_low4S node:$LHS), node:$RHS)>>;
5081 // Index can only be half of the max value for lane in 64-bit vector
5083 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
5084 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
5085 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5087 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
5088 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32,
5089 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5091 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
5092 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
5093 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5095 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5096 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5097 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5100 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
5101 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
5102 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
5103 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
5105 // Pattern for lane in 128-bit vector
5106 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5107 RegisterOperand EleOpVPR, ValueType ResTy,
5108 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5109 SDPatternOperator hiop, SDPatternOperator coreop>
5111 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5112 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5113 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5115 // Pattern for lane in 64-bit vector
5116 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5117 RegisterOperand EleOpVPR, ValueType ResTy,
5118 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5119 SDPatternOperator hiop, SDPatternOperator coreop>
5121 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5122 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5124 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5126 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
5127 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
5128 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
5129 BinOpFrag<(Neon_vduplane
5130 (Neon_low8H node:$LHS), node:$RHS)>>;
5132 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
5133 op, VPR64, VPR128, v2i64, v2i32, v4i32,
5134 BinOpFrag<(Neon_vduplane
5135 (Neon_low4S node:$LHS), node:$RHS)>>;
5137 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
5138 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16,
5140 BinOpFrag<(Neon_vduplane
5141 (Neon_low8H node:$LHS), node:$RHS)>>;
5143 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
5144 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
5145 BinOpFrag<(Neon_vduplane
5146 (Neon_low4S node:$LHS), node:$RHS)>>;
5148 // Index can only be half of the max value for lane in 64-bit vector
5150 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
5151 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
5152 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5154 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
5155 op, VPR64, VPR64, v2i64, v2i32, v2i32,
5156 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5158 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
5159 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
5160 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5162 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5163 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5164 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5167 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
5168 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
5169 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
5171 multiclass NI_qdma<SDPatternOperator op> {
5172 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
5174 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
5176 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
5178 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
5181 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
5182 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
5184 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
5185 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
5186 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
5187 v4i32, v4i16, v8i16,
5188 BinOpFrag<(Neon_vduplane
5189 (Neon_low8H node:$LHS), node:$RHS)>>;
5191 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
5192 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
5193 v2i64, v2i32, v4i32,
5194 BinOpFrag<(Neon_vduplane
5195 (Neon_low4S node:$LHS), node:$RHS)>>;
5197 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
5198 !cast<PatFrag>(op # "_4s"), VPR128Lo,
5199 v4i32, v8i16, v8i16, v4i16, Neon_High8H,
5200 BinOpFrag<(Neon_vduplane
5201 (Neon_low8H node:$LHS), node:$RHS)>>;
5203 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
5204 !cast<PatFrag>(op # "_2d"), VPR128,
5205 v2i64, v4i32, v4i32, v2i32, Neon_High4S,
5206 BinOpFrag<(Neon_vduplane
5207 (Neon_low4S node:$LHS), node:$RHS)>>;
5209 // Index can only be half of the max value for lane in 64-bit vector
5211 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
5212 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
5213 v4i32, v4i16, v4i16,
5214 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5216 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
5217 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
5218 v2i64, v2i32, v2i32,
5219 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5221 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
5222 !cast<PatFrag>(op # "_4s"), VPR64Lo,
5223 v4i32, v8i16, v4i16, v4i16, Neon_High8H,
5224 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5226 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5227 !cast<PatFrag>(op # "_2d"), VPR64,
5228 v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5229 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5232 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
5233 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
5235 // End of implementation for instruction class (3V Elem)
5237 //Insert element (vector, from main)
5238 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
5240 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5242 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
5244 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5246 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
5248 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5250 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
5252 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5255 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
5256 RegisterClass OpGPR, ValueType OpTy,
5257 Operand OpImm, Instruction INS>
5258 : Pat<(ResTy (vector_insert
5262 (ResTy (EXTRACT_SUBREG
5263 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
5264 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
5266 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
5267 neon_uimm3_bare, INSbw>;
5268 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
5269 neon_uimm2_bare, INShw>;
5270 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
5271 neon_uimm1_bare, INSsw>;
5272 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
5273 neon_uimm0_bare, INSdx>;
5275 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
5276 : NeonI_insert<0b1, 0b1,
5277 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
5278 ResImm:$Immd, ResImm:$Immn),
5279 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
5282 let Constraints = "$src = $Rd";
5287 //Insert element (vector, from element)
5288 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
5289 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
5290 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
5292 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
5293 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
5294 let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
5295 // bit 11 is unspecified.
5297 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
5298 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
5299 let Inst{14-13} = {Immn{1}, Immn{0}};
5300 // bits 11-12 are unspecified.
5302 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
5303 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
5304 let Inst{14} = Immn{0};
5305 // bits 11-13 are unspecified.
5308 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
5309 ValueType MidTy, Operand StImm, Operand NaImm,
5311 def : Pat<(ResTy (vector_insert
5312 (ResTy VPR128:$src),
5313 (MidTy (vector_extract
5317 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
5318 StImm:$Immd, StImm:$Immn)>;
5320 def : Pat <(ResTy (vector_insert
5321 (ResTy VPR128:$src),
5322 (MidTy (vector_extract
5326 (INS (ResTy VPR128:$src),
5327 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
5328 StImm:$Immd, NaImm:$Immn)>;
5330 def : Pat <(NaTy (vector_insert
5332 (MidTy (vector_extract
5336 (NaTy (EXTRACT_SUBREG
5338 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
5340 NaImm:$Immd, StImm:$Immn)),
5343 def : Pat <(NaTy (vector_insert
5345 (MidTy (vector_extract
5349 (NaTy (EXTRACT_SUBREG
5351 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
5352 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
5353 NaImm:$Immd, NaImm:$Immn)),
5357 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
5358 neon_uimm1_bare, INSELs>;
5359 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
5360 neon_uimm0_bare, INSELd>;
5361 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
5362 neon_uimm3_bare, INSELb>;
5363 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
5364 neon_uimm2_bare, INSELh>;
5365 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5366 neon_uimm1_bare, INSELs>;
5367 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
5368 neon_uimm0_bare, INSELd>;
5370 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
5372 RegisterClass OpFPR, Operand ResImm,
5373 SubRegIndex SubIndex, Instruction INS> {
5374 def : Pat <(ResTy (vector_insert
5375 (ResTy VPR128:$src),
5378 (INS (ResTy VPR128:$src),
5379 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
5383 def : Pat <(NaTy (vector_insert
5387 (NaTy (EXTRACT_SUBREG
5389 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
5390 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
5396 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
5398 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
5401 class NeonI_SMOV<string asmop, string Res, bit Q,
5402 ValueType OpTy, ValueType eleTy,
5403 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
5404 : NeonI_copy<Q, 0b0, 0b0101,
5405 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
5406 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
5407 [(set (ResTy ResGPR:$Rd),
5409 (ResTy (vector_extract
5410 (OpTy VPR128:$Rn), (OpImm:$Imm))),
5416 //Signed integer move (main, from element)
5417 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
5419 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5421 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
5423 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5425 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
5427 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5429 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
5431 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5433 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
5435 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5438 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
5439 ValueType eleTy, Operand StImm, Operand NaImm,
5440 Instruction SMOVI> {
5441 def : Pat<(i64 (sext_inreg
5443 (i32 (vector_extract
5444 (StTy VPR128:$Rn), (StImm:$Imm))))),
5446 (SMOVI VPR128:$Rn, StImm:$Imm)>;
5448 def : Pat<(i64 (sext
5449 (i32 (vector_extract
5450 (StTy VPR128:$Rn), (StImm:$Imm))))),
5451 (SMOVI VPR128:$Rn, StImm:$Imm)>;
5453 def : Pat<(i64 (sext_inreg
5454 (i64 (vector_extract
5455 (NaTy VPR64:$Rn), (NaImm:$Imm))),
5457 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5460 def : Pat<(i64 (sext_inreg
5462 (i32 (vector_extract
5463 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
5465 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5468 def : Pat<(i64 (sext
5469 (i32 (vector_extract
5470 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
5471 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5475 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
5476 neon_uimm3_bare, SMOVxb>;
5477 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
5478 neon_uimm2_bare, SMOVxh>;
5479 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5480 neon_uimm1_bare, SMOVxs>;
5482 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
5483 ValueType eleTy, Operand StImm, Operand NaImm,
5485 : Pat<(i32 (sext_inreg
5486 (i32 (vector_extract
5487 (NaTy VPR64:$Rn), (NaImm:$Imm))),
5489 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5492 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
5493 neon_uimm3_bare, SMOVwb>;
5494 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
5495 neon_uimm2_bare, SMOVwh>;
5497 class NeonI_UMOV<string asmop, string Res, bit Q,
5498 ValueType OpTy, Operand OpImm,
5499 RegisterClass ResGPR, ValueType ResTy>
5500 : NeonI_copy<Q, 0b0, 0b0111,
5501 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
5502 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
5503 [(set (ResTy ResGPR:$Rd),
5504 (ResTy (vector_extract
5505 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
5510 //Unsigned integer move (main, from element)
5511 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
5513 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5515 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
5517 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5519 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
5521 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5523 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
5525 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5528 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
5529 Operand StImm, Operand NaImm,
5531 : Pat<(ResTy (vector_extract
5532 (NaTy VPR64:$Rn), NaImm:$Imm)),
5533 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5536 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
5537 neon_uimm3_bare, UMOVwb>;
5538 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
5539 neon_uimm2_bare, UMOVwh>;
5540 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5541 neon_uimm1_bare, UMOVws>;
5544 (i32 (vector_extract
5545 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
5547 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
5550 (i32 (vector_extract
5551 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
5553 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
5555 def : Pat<(i64 (zext
5556 (i32 (vector_extract
5557 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
5558 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
5561 (i32 (vector_extract
5562 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
5564 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5565 neon_uimm3_bare:$Imm)>;
5568 (i32 (vector_extract
5569 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
5571 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5572 neon_uimm2_bare:$Imm)>;
5574 def : Pat<(i64 (zext
5575 (i32 (vector_extract
5576 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
5577 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5578 neon_uimm0_bare:$Imm)>;
5580 // Additional copy patterns for scalar types
5581 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
5583 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
5585 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
5587 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
5589 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
5590 (FMOVws FPR32:$Rn)>;
5592 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
5593 (FMOVxd FPR64:$Rn)>;
5595 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
5598 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
5601 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
5602 (v1i8 (EXTRACT_SUBREG (v16i8
5603 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
5606 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
5607 (v1i16 (EXTRACT_SUBREG (v8i16
5608 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
5611 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
5614 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
5617 def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
5619 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
5622 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
5625 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
5626 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
5627 (f64 FPR64:$src), sub_64)>;
5629 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
5630 RegisterOperand ResVPR, Operand OpImm>
5631 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
5632 (ins VPR128:$Rn, OpImm:$Imm),
5633 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
5639 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
5641 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5644 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
5646 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5649 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
5651 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5654 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
5656 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5659 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
5661 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5664 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
5666 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5669 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
5671 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5674 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
5675 ValueType OpTy,ValueType NaTy,
5676 ValueType ExTy, Operand OpLImm,
5678 def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
5679 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
5681 def : Pat<(ResTy (Neon_vduplane
5682 (NaTy VPR64:$Rn), OpNImm:$Imm)),
5684 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
5686 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
5687 neon_uimm4_bare, neon_uimm3_bare>;
5688 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
5689 neon_uimm4_bare, neon_uimm3_bare>;
5690 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
5691 neon_uimm3_bare, neon_uimm2_bare>;
5692 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
5693 neon_uimm3_bare, neon_uimm2_bare>;
5694 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
5695 neon_uimm2_bare, neon_uimm1_bare>;
5696 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
5697 neon_uimm2_bare, neon_uimm1_bare>;
5698 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
5699 neon_uimm1_bare, neon_uimm0_bare>;
5700 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
5701 neon_uimm2_bare, neon_uimm1_bare>;
5702 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
5703 neon_uimm2_bare, neon_uimm1_bare>;
5704 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
5705 neon_uimm1_bare, neon_uimm0_bare>;
5707 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
5709 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
5711 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
5713 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
5715 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
5717 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
5720 class NeonI_DUP<bit Q, string asmop, string rdlane,
5721 RegisterOperand ResVPR, ValueType ResTy,
5722 RegisterClass OpGPR, ValueType OpTy>
5723 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
5724 asmop # "\t$Rd" # rdlane # ", $Rn",
5725 [(set (ResTy ResVPR:$Rd),
5726 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
5729 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
5731 // bits 17-19 are unspecified.
5734 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
5735 let Inst{17-16} = 0b10;
5736 // bits 18-19 are unspecified.
5739 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
5740 let Inst{18-16} = 0b100;
5741 // bit 19 is unspecified.
5744 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
5745 let Inst{19-16} = 0b1000;
5748 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
5750 // bits 17-19 are unspecified.
5753 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
5754 let Inst{17-16} = 0b10;
5755 // bits 18-19 are unspecified.
5758 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
5759 let Inst{18-16} = 0b100;
5760 // bit 19 is unspecified.
5763 // patterns for CONCAT_VECTORS
5764 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
5765 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
5766 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
5767 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
5769 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5770 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
5773 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
5775 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5779 defm : Concat_Vector_Pattern<v16i8, v8i8>;
5780 defm : Concat_Vector_Pattern<v8i16, v4i16>;
5781 defm : Concat_Vector_Pattern<v4i32, v2i32>;
5782 defm : Concat_Vector_Pattern<v2i64, v1i64>;
5783 defm : Concat_Vector_Pattern<v4f32, v2f32>;
5784 defm : Concat_Vector_Pattern<v2f64, v1f64>;
5786 //patterns for EXTRACT_SUBVECTOR
5787 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
5788 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5789 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
5790 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5791 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
5792 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5793 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
5794 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5795 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
5796 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5797 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
5798 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5801 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
5802 string asmop, SDPatternOperator opnode>
5803 : NeonI_Crypto_AES<size, opcode,
5804 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
5805 asmop # "\t$Rd.16b, $Rn.16b",
5806 [(set (v16i8 VPR128:$Rd),
5807 (v16i8 (opnode (v16i8 VPR128:$src),
5808 (v16i8 VPR128:$Rn))))],
5810 let Constraints = "$src = $Rd";
5813 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
5814 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
5816 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
5817 string asmop, SDPatternOperator opnode>
5818 : NeonI_Crypto_AES<size, opcode,
5819 (outs VPR128:$Rd), (ins VPR128:$Rn),
5820 asmop # "\t$Rd.16b, $Rn.16b",
5821 [(set (v16i8 VPR128:$Rd),
5822 (v16i8 (opnode (v16i8 VPR128:$Rn))))],
5825 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
5826 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
5828 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
5829 string asmop, SDPatternOperator opnode>
5830 : NeonI_Crypto_SHA<size, opcode,
5831 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
5832 asmop # "\t$Rd.4s, $Rn.4s",
5833 [(set (v4i32 VPR128:$Rd),
5834 (v4i32 (opnode (v4i32 VPR128:$src),
5835 (v4i32 VPR128:$Rn))))],
5837 let Constraints = "$src = $Rd";
5840 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
5841 int_arm_neon_sha1su1>;
5842 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
5843 int_arm_neon_sha256su0>;
5845 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
5846 string asmop, SDPatternOperator opnode>
5847 : NeonI_Crypto_SHA<size, opcode,
5848 (outs FPR32:$Rd), (ins FPR32:$Rn),
5849 asmop # "\t$Rd, $Rn",
5850 [(set (v1i32 FPR32:$Rd),
5851 (v1i32 (opnode (v1i32 FPR32:$Rn))))],
5854 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
5856 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
5857 SDPatternOperator opnode>
5858 : NeonI_Crypto_3VSHA<size, opcode,
5860 (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
5861 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
5862 [(set (v4i32 VPR128:$Rd),
5863 (v4i32 (opnode (v4i32 VPR128:$src),
5865 (v4i32 VPR128:$Rm))))],
5867 let Constraints = "$src = $Rd";
5870 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
5871 int_arm_neon_sha1su0>;
5872 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
5873 int_arm_neon_sha256su1>;
5875 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
5876 SDPatternOperator opnode>
5877 : NeonI_Crypto_3VSHA<size, opcode,
5879 (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
5880 asmop # "\t$Rd, $Rn, $Rm.4s",
5881 [(set (v4i32 FPR128:$Rd),
5882 (v4i32 (opnode (v4i32 FPR128:$src),
5884 (v4i32 VPR128:$Rm))))],
5886 let Constraints = "$src = $Rd";
5889 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
5890 int_arm_neon_sha256h>;
5891 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
5892 int_arm_neon_sha256h2>;
5894 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
5895 SDPatternOperator opnode>
5896 : NeonI_Crypto_3VSHA<size, opcode,
5898 (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
5899 asmop # "\t$Rd, $Rn, $Rm.4s",
5900 [(set (v4i32 FPR128:$Rd),
5901 (v4i32 (opnode (v4i32 FPR128:$src),
5903 (v4i32 VPR128:$Rm))))],
5905 let Constraints = "$src = $Rd";
5908 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
5909 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
5910 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;