1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19 SDTCisSameAs<0, 3>]>>;
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
24 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
26 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38 [SDTCisVec<0>, SDTCisVec<1>]>>;
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
44 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
46 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
47 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
49 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
51 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
52 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
54 //===----------------------------------------------------------------------===//
56 //===----------------------------------------------------------------------===//
58 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
59 string asmop, SDPatternOperator opnode8B,
60 SDPatternOperator opnode16B,
63 let isCommutable = Commutable in {
64 def _8B : NeonI_3VSame<0b0, u, size, opcode,
65 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
66 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
67 [(set (v8i8 VPR64:$Rd),
68 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
71 def _16B : NeonI_3VSame<0b1, u, size, opcode,
72 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
73 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
74 [(set (v16i8 VPR128:$Rd),
75 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
81 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
82 string asmop, SDPatternOperator opnode,
85 let isCommutable = Commutable in {
86 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
87 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
88 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
89 [(set (v4i16 VPR64:$Rd),
90 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
93 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
94 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
95 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
96 [(set (v8i16 VPR128:$Rd),
97 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
100 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
101 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
102 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
103 [(set (v2i32 VPR64:$Rd),
104 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
107 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
108 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
109 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
110 [(set (v4i32 VPR128:$Rd),
111 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
115 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
116 string asmop, SDPatternOperator opnode,
118 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
120 let isCommutable = Commutable in {
121 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
122 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
123 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
124 [(set (v8i8 VPR64:$Rd),
125 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
128 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
129 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
130 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
131 [(set (v16i8 VPR128:$Rd),
132 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
137 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
138 string asmop, SDPatternOperator opnode,
140 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
142 let isCommutable = Commutable in {
143 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
144 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
145 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
146 [(set (v2i64 VPR128:$Rd),
147 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
152 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
153 // but Result types can be integer or floating point types.
154 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
155 string asmop, SDPatternOperator opnode2S,
156 SDPatternOperator opnode4S,
157 SDPatternOperator opnode2D,
158 ValueType ResTy2S, ValueType ResTy4S,
159 ValueType ResTy2D, bit Commutable = 0>
161 let isCommutable = Commutable in {
162 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
163 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
164 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
165 [(set (ResTy2S VPR64:$Rd),
166 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
169 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
170 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
171 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
172 [(set (ResTy4S VPR128:$Rd),
173 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
176 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
177 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
178 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
179 [(set (ResTy2D VPR128:$Rd),
180 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
185 //===----------------------------------------------------------------------===//
186 // Instruction Definitions
187 //===----------------------------------------------------------------------===//
189 // Vector Arithmetic Instructions
191 // Vector Add (Integer and Floating-Point)
193 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
194 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
195 v2f32, v4f32, v2f64, 1>;
197 // Vector Sub (Integer and Floating-Point)
199 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
200 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
201 v2f32, v4f32, v2f64, 0>;
203 // Vector Multiply (Integer and Floating-Point)
205 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
206 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
207 v2f32, v4f32, v2f64, 1>;
209 // Vector Multiply (Polynomial)
211 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
212 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
214 // Vector Multiply-accumulate and Multiply-subtract (Integer)
216 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
217 // two operands constraints.
218 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
219 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
220 bits<5> opcode, SDPatternOperator opnode>
221 : NeonI_3VSame<q, u, size, opcode,
222 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
223 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
224 [(set (OpTy VPRC:$Rd),
225 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
227 let Constraints = "$src = $Rd";
230 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
231 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
233 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
234 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
237 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
238 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
239 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
240 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
241 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
242 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
243 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
244 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
245 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
246 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
247 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
248 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
250 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
251 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
252 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
253 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
254 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
255 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
256 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
257 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
258 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
259 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
260 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
261 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
263 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
265 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
266 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
268 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
269 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
271 let Predicates = [HasNEON, UseFusedMAC] in {
272 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
273 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
274 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
275 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
276 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
277 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
279 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
280 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
281 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
282 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
283 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
284 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
287 // We're also allowed to match the fma instruction regardless of compile
289 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
290 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
291 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
292 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
293 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
294 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
296 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
297 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
298 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
299 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
300 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
301 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
303 // Vector Divide (Floating-Point)
305 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
306 v2f32, v4f32, v2f64, 0>;
308 // Vector Bitwise Operations
310 // Vector Bitwise AND
312 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
314 // Vector Bitwise Exclusive OR
316 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
320 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
322 // ORR disassembled as MOV if Vn==Vm
324 // Vector Move - register
325 // Alias for ORR if Vn=Vm.
326 // FIXME: This is actually the preferred syntax but TableGen can't deal with
327 // custom printing of aliases.
328 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
329 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
330 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
331 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
333 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
334 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
335 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
337 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
338 OpCmodeConstVal->getZExtValue(), EltBits);
339 return (EltBits == 8 && EltVal == 0xff);
342 def Neon_immAllZeros: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
343 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
344 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
346 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
347 OpCmodeConstVal->getZExtValue(), EltBits);
348 return (EltBits == 8 && EltVal == 0x0);
352 def Neon_not8B : PatFrag<(ops node:$in),
353 (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
354 def Neon_not16B : PatFrag<(ops node:$in),
355 (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
357 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
358 (or node:$Rn, (Neon_not8B node:$Rm))>;
360 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
361 (or node:$Rn, (Neon_not16B node:$Rm))>;
363 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
364 (and node:$Rn, (Neon_not8B node:$Rm))>;
366 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
367 (and node:$Rn, (Neon_not16B node:$Rm))>;
370 // Vector Bitwise OR NOT - register
372 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
373 Neon_orn8B, Neon_orn16B, 0>;
375 // Vector Bitwise Bit Clear (AND NOT) - register
377 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
378 Neon_bic8B, Neon_bic16B, 0>;
380 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
381 SDPatternOperator opnode16B,
383 Instruction INST16B> {
384 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
385 (INST8B VPR64:$Rn, VPR64:$Rm)>;
386 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
387 (INST8B VPR64:$Rn, VPR64:$Rm)>;
388 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
389 (INST8B VPR64:$Rn, VPR64:$Rm)>;
390 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
391 (INST16B VPR128:$Rn, VPR128:$Rm)>;
392 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
393 (INST16B VPR128:$Rn, VPR128:$Rm)>;
394 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
395 (INST16B VPR128:$Rn, VPR128:$Rm)>;
398 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
399 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
400 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
401 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
402 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
403 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
405 // Vector Bitwise Select
406 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
407 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
409 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
410 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
412 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
414 Instruction INST16B> {
415 // Disassociate type from instruction definition
416 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
417 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
418 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
419 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
420 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
421 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
422 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
423 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
424 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
425 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
426 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
427 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
429 // Allow to match BSL instruction pattern with non-constant operand
430 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
431 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
432 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
433 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
434 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
435 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
436 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
437 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
438 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
439 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
440 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
441 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
442 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
443 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
444 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
445 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
446 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
447 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
448 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
449 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
450 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
451 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
452 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
453 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
455 // Allow to match llvm.arm.* intrinsics.
456 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
457 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
458 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
459 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
460 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
461 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
462 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
463 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
464 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
465 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
466 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
467 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
468 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
469 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
470 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
471 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
472 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
473 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
474 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
475 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
476 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
477 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
478 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
479 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
480 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
481 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
482 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
483 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
484 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
485 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
486 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
487 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
488 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
491 // Additional patterns for bitwise instruction BSL
492 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
494 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
495 (Neon_bsl node:$src, node:$Rn, node:$Rm),
496 [{ (void)N; return false; }]>;
498 // Vector Bitwise Insert if True
500 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
501 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
502 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
503 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
505 // Vector Bitwise Insert if False
507 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
508 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
509 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
510 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
512 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
514 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
515 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
516 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
517 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
519 // Vector Absolute Difference and Accumulate (Unsigned)
520 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
521 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
522 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
523 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
524 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
525 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
526 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
527 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
528 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
529 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
530 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
531 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
533 // Vector Absolute Difference and Accumulate (Signed)
534 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
535 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
536 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
537 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
538 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
539 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
540 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
541 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
542 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
543 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
544 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
545 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
548 // Vector Absolute Difference (Signed, Unsigned)
549 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
550 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
552 // Vector Absolute Difference (Floating Point)
553 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
554 int_arm_neon_vabds, int_arm_neon_vabds,
555 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
557 // Vector Reciprocal Step (Floating Point)
558 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
559 int_arm_neon_vrecps, int_arm_neon_vrecps,
561 v2f32, v4f32, v2f64, 0>;
563 // Vector Reciprocal Square Root Step (Floating Point)
564 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
565 int_arm_neon_vrsqrts,
566 int_arm_neon_vrsqrts,
567 int_arm_neon_vrsqrts,
568 v2f32, v4f32, v2f64, 0>;
570 // Vector Comparisons
572 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
573 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
574 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
575 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
576 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
577 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
578 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
579 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
580 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
581 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
583 // NeonI_compare_aliases class: swaps register operands to implement
584 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
585 class NeonI_compare_aliases<string asmop, string asmlane,
586 Instruction inst, RegisterOperand VPRC>
587 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
589 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
591 // Vector Comparisons (Integer)
593 // Vector Compare Mask Equal (Integer)
594 let isCommutable =1 in {
595 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
598 // Vector Compare Mask Higher or Same (Unsigned Integer)
599 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
601 // Vector Compare Mask Greater Than or Equal (Integer)
602 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
604 // Vector Compare Mask Higher (Unsigned Integer)
605 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
607 // Vector Compare Mask Greater Than (Integer)
608 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
610 // Vector Compare Mask Bitwise Test (Integer)
611 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
613 // Vector Compare Mask Less or Same (Unsigned Integer)
614 // CMLS is alias for CMHS with operands reversed.
615 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
616 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
617 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
618 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
619 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
620 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
621 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
623 // Vector Compare Mask Less Than or Equal (Integer)
624 // CMLE is alias for CMGE with operands reversed.
625 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
626 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
627 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
628 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
629 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
630 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
631 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
633 // Vector Compare Mask Lower (Unsigned Integer)
634 // CMLO is alias for CMHI with operands reversed.
635 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
636 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
637 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
638 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
639 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
640 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
641 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
643 // Vector Compare Mask Less Than (Integer)
644 // CMLT is alias for CMGT with operands reversed.
645 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
646 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
647 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
648 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
649 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
650 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
651 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
654 def neon_uimm0_asmoperand : AsmOperandClass
657 let PredicateMethod = "isUImm<0>";
658 let RenderMethod = "addImmOperands";
661 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
662 let ParserMatchClass = neon_uimm0_asmoperand;
663 let PrintMethod = "printNeonUImm0Operand";
667 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
669 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
670 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
671 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
672 [(set (v8i8 VPR64:$Rd),
673 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
676 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
677 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
678 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
679 [(set (v16i8 VPR128:$Rd),
680 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
683 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
684 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
685 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
686 [(set (v4i16 VPR64:$Rd),
687 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
690 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
691 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
692 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
693 [(set (v8i16 VPR128:$Rd),
694 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
697 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
698 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
699 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
700 [(set (v2i32 VPR64:$Rd),
701 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
704 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
705 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
706 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
707 [(set (v4i32 VPR128:$Rd),
708 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
711 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
712 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
713 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
714 [(set (v2i64 VPR128:$Rd),
715 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
719 // Vector Compare Mask Equal to Zero (Integer)
720 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
722 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
723 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
725 // Vector Compare Mask Greater Than Zero (Signed Integer)
726 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
728 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
729 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
731 // Vector Compare Mask Less Than Zero (Signed Integer)
732 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
734 // Vector Comparisons (Floating Point)
736 // Vector Compare Mask Equal (Floating Point)
737 let isCommutable =1 in {
738 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
739 Neon_cmeq, Neon_cmeq,
740 v2i32, v4i32, v2i64, 0>;
743 // Vector Compare Mask Greater Than Or Equal (Floating Point)
744 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
745 Neon_cmge, Neon_cmge,
746 v2i32, v4i32, v2i64, 0>;
748 // Vector Compare Mask Greater Than (Floating Point)
749 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
750 Neon_cmgt, Neon_cmgt,
751 v2i32, v4i32, v2i64, 0>;
753 // Vector Compare Mask Less Than Or Equal (Floating Point)
754 // FCMLE is alias for FCMGE with operands reversed.
755 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
756 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
757 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
759 // Vector Compare Mask Less Than (Floating Point)
760 // FCMLT is alias for FCMGT with operands reversed.
761 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
762 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
763 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
766 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
767 string asmop, CondCode CC>
769 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
770 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
771 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
772 [(set (v2i32 VPR64:$Rd),
773 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
776 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
777 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
778 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
779 [(set (v4i32 VPR128:$Rd),
780 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
783 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
784 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
785 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
786 [(set (v2i64 VPR128:$Rd),
787 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
791 // Vector Compare Mask Equal to Zero (Floating Point)
792 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
794 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
795 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
797 // Vector Compare Mask Greater Than Zero (Floating Point)
798 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
800 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
801 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
803 // Vector Compare Mask Less Than Zero (Floating Point)
804 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
806 // Vector Absolute Comparisons (Floating Point)
808 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
809 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
810 int_arm_neon_vacged, int_arm_neon_vacgeq,
811 int_aarch64_neon_vacgeq,
812 v2i32, v4i32, v2i64, 0>;
814 // Vector Absolute Compare Mask Greater Than (Floating Point)
815 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
816 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
817 int_aarch64_neon_vacgtq,
818 v2i32, v4i32, v2i64, 0>;
820 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
821 // FACLE is alias for FACGE with operands reversed.
822 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
823 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
824 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
826 // Vector Absolute Compare Mask Less Than (Floating Point)
827 // FACLT is alias for FACGT with operands reversed.
828 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
829 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
830 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
832 // Vector halving add (Integer Signed, Unsigned)
833 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
834 int_arm_neon_vhadds, 1>;
835 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
836 int_arm_neon_vhaddu, 1>;
838 // Vector halving sub (Integer Signed, Unsigned)
839 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
840 int_arm_neon_vhsubs, 0>;
841 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
842 int_arm_neon_vhsubu, 0>;
844 // Vector rouding halving add (Integer Signed, Unsigned)
845 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
846 int_arm_neon_vrhadds, 1>;
847 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
848 int_arm_neon_vrhaddu, 1>;
850 // Vector Saturating add (Integer Signed, Unsigned)
851 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
852 int_arm_neon_vqadds, 1>;
853 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
854 int_arm_neon_vqaddu, 1>;
856 // Vector Saturating sub (Integer Signed, Unsigned)
857 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
858 int_arm_neon_vqsubs, 1>;
859 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
860 int_arm_neon_vqsubu, 1>;
862 // Vector Shift Left (Signed and Unsigned Integer)
863 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
864 int_arm_neon_vshifts, 1>;
865 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
866 int_arm_neon_vshiftu, 1>;
868 // Vector Saturating Shift Left (Signed and Unsigned Integer)
869 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
870 int_arm_neon_vqshifts, 1>;
871 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
872 int_arm_neon_vqshiftu, 1>;
874 // Vector Rouding Shift Left (Signed and Unsigned Integer)
875 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
876 int_arm_neon_vrshifts, 1>;
877 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
878 int_arm_neon_vrshiftu, 1>;
880 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
881 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
882 int_arm_neon_vqrshifts, 1>;
883 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
884 int_arm_neon_vqrshiftu, 1>;
886 // Vector Maximum (Signed and Unsigned Integer)
887 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
888 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
890 // Vector Minimum (Signed and Unsigned Integer)
891 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
892 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
894 // Vector Maximum (Floating Point)
895 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
896 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
897 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
899 // Vector Minimum (Floating Point)
900 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
901 int_arm_neon_vmins, int_arm_neon_vmins,
902 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
904 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
905 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
906 int_aarch64_neon_vmaxnm,
907 int_aarch64_neon_vmaxnm,
908 int_aarch64_neon_vmaxnm,
909 v2f32, v4f32, v2f64, 1>;
911 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
912 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
913 int_aarch64_neon_vminnm,
914 int_aarch64_neon_vminnm,
915 int_aarch64_neon_vminnm,
916 v2f32, v4f32, v2f64, 1>;
918 // Vector Maximum Pairwise (Signed and Unsigned Integer)
919 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
920 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
922 // Vector Minimum Pairwise (Signed and Unsigned Integer)
923 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
924 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
926 // Vector Maximum Pairwise (Floating Point)
927 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
928 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
929 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
931 // Vector Minimum Pairwise (Floating Point)
932 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
933 int_arm_neon_vpmins, int_arm_neon_vpmins,
934 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
936 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
937 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
938 int_aarch64_neon_vpmaxnm,
939 int_aarch64_neon_vpmaxnm,
940 int_aarch64_neon_vpmaxnm,
941 v2f32, v4f32, v2f64, 1>;
943 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
944 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
945 int_aarch64_neon_vpminnm,
946 int_aarch64_neon_vpminnm,
947 int_aarch64_neon_vpminnm,
948 v2f32, v4f32, v2f64, 1>;
950 // Vector Addition Pairwise (Integer)
951 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
953 // Vector Addition Pairwise (Floating Point)
954 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
958 v2f32, v4f32, v2f64, 1>;
960 // Vector Saturating Doubling Multiply High
961 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
962 int_arm_neon_vqdmulh, 1>;
964 // Vector Saturating Rouding Doubling Multiply High
965 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
966 int_arm_neon_vqrdmulh, 1>;
968 // Vector Multiply Extended (Floating Point)
969 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
970 int_aarch64_neon_vmulx,
971 int_aarch64_neon_vmulx,
972 int_aarch64_neon_vmulx,
973 v2f32, v4f32, v2f64, 1>;
975 // Vector Immediate Instructions
977 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
979 def _asmoperand : AsmOperandClass
981 let Name = "NeonMovImmShift" # PREFIX;
982 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
983 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
987 // Definition of vector immediates shift operands
989 // The selectable use-cases extract the shift operation
990 // information from the OpCmode fields encoded in the immediate.
991 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
992 uint64_t OpCmode = N->getZExtValue();
994 unsigned ShiftOnesIn;
996 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
997 if (!HasShift) return SDValue();
998 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1001 // Vector immediates shift operands which accept LSL and MSL
1002 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1003 // or 0, 8 (LSLH) or 8, 16 (MSL).
1004 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1005 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1006 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
1007 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1009 multiclass neon_mov_imm_shift_operands<string PREFIX,
1010 string HALF, string ISHALF, code pred>
1012 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1015 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1017 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1018 let ParserMatchClass =
1019 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1023 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1025 unsigned ShiftOnesIn;
1027 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1028 return (HasShift && !ShiftOnesIn);
1031 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1033 unsigned ShiftOnesIn;
1035 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1036 return (HasShift && ShiftOnesIn);
1039 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1041 unsigned ShiftOnesIn;
1043 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1044 return (HasShift && !ShiftOnesIn);
1047 def neon_uimm1_asmoperand : AsmOperandClass
1050 let PredicateMethod = "isUImm<1>";
1051 let RenderMethod = "addImmOperands";
1054 def neon_uimm2_asmoperand : AsmOperandClass
1057 let PredicateMethod = "isUImm<2>";
1058 let RenderMethod = "addImmOperands";
1061 def neon_uimm8_asmoperand : AsmOperandClass
1064 let PredicateMethod = "isUImm<8>";
1065 let RenderMethod = "addImmOperands";
1068 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1069 let ParserMatchClass = neon_uimm8_asmoperand;
1070 let PrintMethod = "printNeonUImm8Operand";
1073 def neon_uimm64_mask_asmoperand : AsmOperandClass
1075 let Name = "NeonUImm64Mask";
1076 let PredicateMethod = "isNeonUImm64Mask";
1077 let RenderMethod = "addNeonUImm64MaskOperands";
1080 // MCOperand for 64-bit bytemask with each byte having only the
1081 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1082 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1083 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1084 let PrintMethod = "printNeonUImm64MaskOperand";
1087 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1088 SDPatternOperator opnode>
1090 // shift zeros, per word
1091 def _2S : NeonI_1VModImm<0b0, op,
1093 (ins neon_uimm8:$Imm,
1094 neon_mov_imm_LSL_operand:$Simm),
1095 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1096 [(set (v2i32 VPR64:$Rd),
1097 (v2i32 (opnode (timm:$Imm),
1098 (neon_mov_imm_LSL_operand:$Simm))))],
1101 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1104 def _4S : NeonI_1VModImm<0b1, op,
1106 (ins neon_uimm8:$Imm,
1107 neon_mov_imm_LSL_operand:$Simm),
1108 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1109 [(set (v4i32 VPR128:$Rd),
1110 (v4i32 (opnode (timm:$Imm),
1111 (neon_mov_imm_LSL_operand:$Simm))))],
1114 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1117 // shift zeros, per halfword
1118 def _4H : NeonI_1VModImm<0b0, op,
1120 (ins neon_uimm8:$Imm,
1121 neon_mov_imm_LSLH_operand:$Simm),
1122 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1123 [(set (v4i16 VPR64:$Rd),
1124 (v4i16 (opnode (timm:$Imm),
1125 (neon_mov_imm_LSLH_operand:$Simm))))],
1128 let cmode = {0b1, 0b0, Simm, 0b0};
1131 def _8H : NeonI_1VModImm<0b1, op,
1133 (ins neon_uimm8:$Imm,
1134 neon_mov_imm_LSLH_operand:$Simm),
1135 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1136 [(set (v8i16 VPR128:$Rd),
1137 (v8i16 (opnode (timm:$Imm),
1138 (neon_mov_imm_LSLH_operand:$Simm))))],
1141 let cmode = {0b1, 0b0, Simm, 0b0};
1145 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1146 SDPatternOperator opnode,
1147 SDPatternOperator neonopnode>
1149 let Constraints = "$src = $Rd" in {
1150 // shift zeros, per word
1151 def _2S : NeonI_1VModImm<0b0, op,
1153 (ins VPR64:$src, neon_uimm8:$Imm,
1154 neon_mov_imm_LSL_operand:$Simm),
1155 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1156 [(set (v2i32 VPR64:$Rd),
1157 (v2i32 (opnode (v2i32 VPR64:$src),
1158 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1159 neon_mov_imm_LSL_operand:$Simm)))))))],
1162 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1165 def _4S : NeonI_1VModImm<0b1, op,
1167 (ins VPR128:$src, neon_uimm8:$Imm,
1168 neon_mov_imm_LSL_operand:$Simm),
1169 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1170 [(set (v4i32 VPR128:$Rd),
1171 (v4i32 (opnode (v4i32 VPR128:$src),
1172 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1173 neon_mov_imm_LSL_operand:$Simm)))))))],
1176 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1179 // shift zeros, per halfword
1180 def _4H : NeonI_1VModImm<0b0, op,
1182 (ins VPR64:$src, neon_uimm8:$Imm,
1183 neon_mov_imm_LSLH_operand:$Simm),
1184 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1185 [(set (v4i16 VPR64:$Rd),
1186 (v4i16 (opnode (v4i16 VPR64:$src),
1187 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1188 neon_mov_imm_LSL_operand:$Simm)))))))],
1191 let cmode = {0b1, 0b0, Simm, 0b1};
1194 def _8H : NeonI_1VModImm<0b1, op,
1196 (ins VPR128:$src, neon_uimm8:$Imm,
1197 neon_mov_imm_LSLH_operand:$Simm),
1198 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1199 [(set (v8i16 VPR128:$Rd),
1200 (v8i16 (opnode (v8i16 VPR128:$src),
1201 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1202 neon_mov_imm_LSL_operand:$Simm)))))))],
1205 let cmode = {0b1, 0b0, Simm, 0b1};
1210 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1211 SDPatternOperator opnode>
1213 // shift ones, per word
1214 def _2S : NeonI_1VModImm<0b0, op,
1216 (ins neon_uimm8:$Imm,
1217 neon_mov_imm_MSL_operand:$Simm),
1218 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1219 [(set (v2i32 VPR64:$Rd),
1220 (v2i32 (opnode (timm:$Imm),
1221 (neon_mov_imm_MSL_operand:$Simm))))],
1224 let cmode = {0b1, 0b1, 0b0, Simm};
1227 def _4S : NeonI_1VModImm<0b1, op,
1229 (ins neon_uimm8:$Imm,
1230 neon_mov_imm_MSL_operand:$Simm),
1231 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1232 [(set (v4i32 VPR128:$Rd),
1233 (v4i32 (opnode (timm:$Imm),
1234 (neon_mov_imm_MSL_operand:$Simm))))],
1237 let cmode = {0b1, 0b1, 0b0, Simm};
1241 // Vector Move Immediate Shifted
1242 let isReMaterializable = 1 in {
1243 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1246 // Vector Move Inverted Immediate Shifted
1247 let isReMaterializable = 1 in {
1248 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1251 // Vector Bitwise Bit Clear (AND NOT) - immediate
1252 let isReMaterializable = 1 in {
1253 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1257 // Vector Bitwise OR - immedidate
1259 let isReMaterializable = 1 in {
1260 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1264 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1265 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1266 // BIC immediate instructions selection requires additional patterns to
1267 // transform Neon_movi operands into BIC immediate operands
1269 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1270 uint64_t OpCmode = N->getZExtValue();
1272 unsigned ShiftOnesIn;
1273 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1274 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1275 // Transform encoded shift amount 0 to 1 and 1 to 0.
1276 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1279 def neon_mov_imm_LSLH_transform_operand
1282 unsigned ShiftOnesIn;
1284 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1285 return (HasShift && !ShiftOnesIn); }],
1286 neon_mov_imm_LSLH_transform_XFORM>;
1288 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1289 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1290 def : Pat<(v4i16 (and VPR64:$src,
1291 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1292 (BICvi_lsl_4H VPR64:$src, 0,
1293 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1295 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1296 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1297 def : Pat<(v8i16 (and VPR128:$src,
1298 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1299 (BICvi_lsl_8H VPR128:$src, 0,
1300 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1303 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1304 SDPatternOperator neonopnode,
1306 Instruction INST8H> {
1307 def : Pat<(v8i8 (opnode VPR64:$src,
1308 (bitconvert(v4i16 (neonopnode timm:$Imm,
1309 neon_mov_imm_LSLH_operand:$Simm))))),
1310 (INST4H VPR64:$src, neon_uimm8:$Imm,
1311 neon_mov_imm_LSLH_operand:$Simm)>;
1312 def : Pat<(v1i64 (opnode VPR64:$src,
1313 (bitconvert(v4i16 (neonopnode timm:$Imm,
1314 neon_mov_imm_LSLH_operand:$Simm))))),
1315 (INST4H VPR64:$src, neon_uimm8:$Imm,
1316 neon_mov_imm_LSLH_operand:$Simm)>;
1318 def : Pat<(v16i8 (opnode VPR128:$src,
1319 (bitconvert(v8i16 (neonopnode timm:$Imm,
1320 neon_mov_imm_LSLH_operand:$Simm))))),
1321 (INST8H VPR128:$src, neon_uimm8:$Imm,
1322 neon_mov_imm_LSLH_operand:$Simm)>;
1323 def : Pat<(v4i32 (opnode VPR128:$src,
1324 (bitconvert(v8i16 (neonopnode timm:$Imm,
1325 neon_mov_imm_LSLH_operand:$Simm))))),
1326 (INST8H VPR128:$src, neon_uimm8:$Imm,
1327 neon_mov_imm_LSLH_operand:$Simm)>;
1328 def : Pat<(v2i64 (opnode VPR128:$src,
1329 (bitconvert(v8i16 (neonopnode timm:$Imm,
1330 neon_mov_imm_LSLH_operand:$Simm))))),
1331 (INST8H VPR128:$src, neon_uimm8:$Imm,
1332 neon_mov_imm_LSLH_operand:$Simm)>;
1335 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1336 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1338 // Additional patterns for Vector Bitwise OR - immedidate
1339 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1342 // Vector Move Immediate Masked
1343 let isReMaterializable = 1 in {
1344 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1347 // Vector Move Inverted Immediate Masked
1348 let isReMaterializable = 1 in {
1349 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1352 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1353 Instruction inst, RegisterOperand VPRC>
1354 : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
1355 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1357 // Aliases for Vector Move Immediate Shifted
1358 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1359 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1360 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1361 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1363 // Aliases for Vector Move Inverted Immediate Shifted
1364 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1365 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1366 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1367 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1369 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1370 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1371 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1372 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1373 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1375 // Aliases for Vector Bitwise OR - immedidate
1376 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1377 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1378 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1379 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1381 // Vector Move Immediate - per byte
1382 let isReMaterializable = 1 in {
1383 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1384 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1385 "movi\t$Rd.8b, $Imm",
1386 [(set (v8i8 VPR64:$Rd),
1387 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1392 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1393 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1394 "movi\t$Rd.16b, $Imm",
1395 [(set (v16i8 VPR128:$Rd),
1396 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1402 // Vector Move Immediate - bytemask, per double word
1403 let isReMaterializable = 1 in {
1404 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1405 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1406 "movi\t $Rd.2d, $Imm",
1407 [(set (v2i64 VPR128:$Rd),
1408 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1414 // Vector Move Immediate - bytemask, one doubleword
1416 let isReMaterializable = 1 in {
1417 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1418 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1420 [(set (f64 FPR64:$Rd),
1422 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1428 // Vector Floating Point Move Immediate
1430 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1431 Operand immOpType, bit q, bit op>
1432 : NeonI_1VModImm<q, op,
1433 (outs VPRC:$Rd), (ins immOpType:$Imm),
1434 "fmov\t$Rd" # asmlane # ", $Imm",
1435 [(set (OpTy VPRC:$Rd),
1436 (OpTy (Neon_fmovi (timm:$Imm))))],
1441 let isReMaterializable = 1 in {
1442 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1443 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1444 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1447 // Vector Shift (Immediate)
1448 // Immediate in [0, 63]
1449 def imm0_63 : Operand<i32> {
1450 let ParserMatchClass = uimm6_asmoperand;
1453 // Shift Right Immediate - A shift right immediate is encoded differently from
1454 // other shift immediates. The immh:immb field is encoded like so:
1457 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1458 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1459 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1460 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1461 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1462 let Name = "ShrImm" # OFFSET;
1463 let RenderMethod = "addImmOperands";
1464 let DiagnosticType = "ShrImm" # OFFSET;
1467 class shr_imm<string OFFSET> : Operand<i32> {
1468 let EncoderMethod = "getShiftRightImm" # OFFSET;
1469 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1470 let ParserMatchClass =
1471 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1474 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1475 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1476 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1477 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1479 def shr_imm8 : shr_imm<"8">;
1480 def shr_imm16 : shr_imm<"16">;
1481 def shr_imm32 : shr_imm<"32">;
1482 def shr_imm64 : shr_imm<"64">;
1484 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1485 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1486 : NeonI_2VShiftImm<q, u, opcode,
1487 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1488 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1489 [(set (Ty VPRC:$Rd),
1490 (Ty (OpNode (Ty VPRC:$Rn),
1491 (Ty (Neon_vdup (i32 imm:$Imm))))))],
1494 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1495 // 64-bit vector types.
1496 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1497 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1500 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1501 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1504 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1505 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1508 // 128-bit vector types.
1509 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1510 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1513 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1514 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1517 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1518 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1521 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1522 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1526 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1527 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1529 let Inst{22-19} = 0b0001;
1532 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1534 let Inst{22-20} = 0b001;
1537 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1539 let Inst{22-21} = 0b01;
1542 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1544 let Inst{22-19} = 0b0001;
1547 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1549 let Inst{22-20} = 0b001;
1552 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1554 let Inst{22-21} = 0b01;
1557 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1564 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1567 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1568 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1570 def Neon_High16B : PatFrag<(ops node:$in),
1571 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1572 def Neon_High8H : PatFrag<(ops node:$in),
1573 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1574 def Neon_High4S : PatFrag<(ops node:$in),
1575 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1577 def Neon_low8H : PatFrag<(ops node:$in),
1578 (v4i16 (extract_subvector (v8i16 node:$in),
1580 def Neon_low4S : PatFrag<(ops node:$in),
1581 (v2i32 (extract_subvector (v4i32 node:$in),
1583 def Neon_low4f : PatFrag<(ops node:$in),
1584 (v2f32 (extract_subvector (v4f32 node:$in),
1587 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1588 string SrcT, ValueType DestTy, ValueType SrcTy,
1589 Operand ImmTy, SDPatternOperator ExtOp>
1590 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1591 (ins VPR64:$Rn, ImmTy:$Imm),
1592 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1593 [(set (DestTy VPR128:$Rd),
1595 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1596 (DestTy (Neon_vdup (i32 imm:$Imm))))))],
1599 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1600 string SrcT, ValueType DestTy, ValueType SrcTy,
1601 int StartIndex, Operand ImmTy,
1602 SDPatternOperator ExtOp, PatFrag getTop>
1603 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1604 (ins VPR128:$Rn, ImmTy:$Imm),
1605 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1606 [(set (DestTy VPR128:$Rd),
1609 (SrcTy (getTop VPR128:$Rn)))),
1610 (DestTy (Neon_vdup (i32 imm:$Imm))))))],
1613 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1615 // 64-bit vector types.
1616 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1618 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1621 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1623 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1626 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1628 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1631 // 128-bit vector types
1632 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1633 v8i16, v8i8, 8, uimm3, ExtOp, Neon_High16B> {
1634 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1637 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1638 v4i32, v4i16, 4, uimm4, ExtOp, Neon_High8H> {
1639 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1642 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1643 v2i64, v2i32, 2, uimm5, ExtOp, Neon_High4S> {
1644 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1647 // Use other patterns to match when the immediate is 0.
1648 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1649 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1651 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1652 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1654 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1655 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1657 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1658 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1660 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1661 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1663 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1664 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1668 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1669 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1671 // Rounding/Saturating shift
1672 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1673 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1674 SDPatternOperator OpNode>
1675 : NeonI_2VShiftImm<q, u, opcode,
1676 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1677 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1678 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1682 // shift right (vector by immediate)
1683 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1684 SDPatternOperator OpNode> {
1685 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1687 let Inst{22-19} = 0b0001;
1690 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1692 let Inst{22-20} = 0b001;
1695 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1697 let Inst{22-21} = 0b01;
1700 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1702 let Inst{22-19} = 0b0001;
1705 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1707 let Inst{22-20} = 0b001;
1710 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1712 let Inst{22-21} = 0b01;
1715 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1721 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1722 SDPatternOperator OpNode> {
1723 // 64-bit vector types.
1724 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1726 let Inst{22-19} = 0b0001;
1729 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1731 let Inst{22-20} = 0b001;
1734 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1736 let Inst{22-21} = 0b01;
1739 // 128-bit vector types.
1740 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1742 let Inst{22-19} = 0b0001;
1745 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1747 let Inst{22-20} = 0b001;
1750 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1752 let Inst{22-21} = 0b01;
1755 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1761 // Rounding shift right
1762 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1763 int_aarch64_neon_vsrshr>;
1764 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1765 int_aarch64_neon_vurshr>;
1767 // Saturating shift left unsigned
1768 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1770 // Saturating shift left
1771 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1772 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1774 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1775 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1777 : NeonI_2VShiftImm<q, u, opcode,
1778 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1779 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1780 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1781 (Ty (OpNode (Ty VPRC:$Rn),
1782 (Ty (Neon_vdup (i32 imm:$Imm))))))))],
1784 let Constraints = "$src = $Rd";
1787 // Shift Right accumulate
1788 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1789 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1791 let Inst{22-19} = 0b0001;
1794 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1796 let Inst{22-20} = 0b001;
1799 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1801 let Inst{22-21} = 0b01;
1804 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1806 let Inst{22-19} = 0b0001;
1809 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1811 let Inst{22-20} = 0b001;
1814 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1816 let Inst{22-21} = 0b01;
1819 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1825 // Shift right and accumulate
1826 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1827 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1829 // Rounding shift accumulate
1830 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1831 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1832 SDPatternOperator OpNode>
1833 : NeonI_2VShiftImm<q, u, opcode,
1834 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1835 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1836 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1837 (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1839 let Constraints = "$src = $Rd";
1842 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1843 SDPatternOperator OpNode> {
1844 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1846 let Inst{22-19} = 0b0001;
1849 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1851 let Inst{22-20} = 0b001;
1854 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1856 let Inst{22-21} = 0b01;
1859 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1861 let Inst{22-19} = 0b0001;
1864 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1866 let Inst{22-20} = 0b001;
1869 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1871 let Inst{22-21} = 0b01;
1874 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1880 // Rounding shift right and accumulate
1881 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1882 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1884 // Shift insert by immediate
1885 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1886 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1887 SDPatternOperator OpNode>
1888 : NeonI_2VShiftImm<q, u, opcode,
1889 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1890 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1891 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1894 let Constraints = "$src = $Rd";
1897 // shift left insert (vector by immediate)
1898 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1899 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1900 int_aarch64_neon_vsli> {
1901 let Inst{22-19} = 0b0001;
1904 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1905 int_aarch64_neon_vsli> {
1906 let Inst{22-20} = 0b001;
1909 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1910 int_aarch64_neon_vsli> {
1911 let Inst{22-21} = 0b01;
1914 // 128-bit vector types
1915 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1916 int_aarch64_neon_vsli> {
1917 let Inst{22-19} = 0b0001;
1920 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1921 int_aarch64_neon_vsli> {
1922 let Inst{22-20} = 0b001;
1925 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1926 int_aarch64_neon_vsli> {
1927 let Inst{22-21} = 0b01;
1930 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1931 int_aarch64_neon_vsli> {
1936 // shift right insert (vector by immediate)
1937 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1938 // 64-bit vector types.
1939 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1940 int_aarch64_neon_vsri> {
1941 let Inst{22-19} = 0b0001;
1944 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1945 int_aarch64_neon_vsri> {
1946 let Inst{22-20} = 0b001;
1949 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1950 int_aarch64_neon_vsri> {
1951 let Inst{22-21} = 0b01;
1954 // 128-bit vector types
1955 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1956 int_aarch64_neon_vsri> {
1957 let Inst{22-19} = 0b0001;
1960 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1961 int_aarch64_neon_vsri> {
1962 let Inst{22-20} = 0b001;
1965 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1966 int_aarch64_neon_vsri> {
1967 let Inst{22-21} = 0b01;
1970 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1971 int_aarch64_neon_vsri> {
1976 // Shift left and insert
1977 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
1979 // Shift right and insert
1980 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
1982 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1983 string SrcT, Operand ImmTy>
1984 : NeonI_2VShiftImm<q, u, opcode,
1985 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
1986 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1989 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1990 string SrcT, Operand ImmTy>
1991 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1992 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
1993 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1995 let Constraints = "$src = $Rd";
1998 // left long shift by immediate
1999 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2000 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2001 let Inst{22-19} = 0b0001;
2004 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2005 let Inst{22-20} = 0b001;
2008 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2009 let Inst{22-21} = 0b01;
2012 // Shift Narrow High
2013 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2015 let Inst{22-19} = 0b0001;
2018 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2020 let Inst{22-20} = 0b001;
2023 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2025 let Inst{22-21} = 0b01;
2029 // Shift right narrow
2030 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2032 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2033 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2034 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2035 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2036 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2037 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2038 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2039 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2041 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2042 (v2i64 (concat_vectors (v1i64 node:$Rm),
2043 (v1i64 node:$Rn)))>;
2044 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2045 (v8i16 (concat_vectors (v4i16 node:$Rm),
2046 (v4i16 node:$Rn)))>;
2047 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2048 (v4i32 (concat_vectors (v2i32 node:$Rm),
2049 (v2i32 node:$Rn)))>;
2050 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2051 (v4f32 (concat_vectors (v2f32 node:$Rm),
2052 (v2f32 node:$Rn)))>;
2053 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2054 (v2f64 (concat_vectors (v1f64 node:$Rm),
2055 (v1f64 node:$Rn)))>;
2057 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2058 (v8i16 (srl (v8i16 node:$lhs),
2059 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2060 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2061 (v4i32 (srl (v4i32 node:$lhs),
2062 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2063 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2064 (v2i64 (srl (v2i64 node:$lhs),
2065 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2066 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2067 (v8i16 (sra (v8i16 node:$lhs),
2068 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2069 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2070 (v4i32 (sra (v4i32 node:$lhs),
2071 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2072 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2073 (v2i64 (sra (v2i64 node:$lhs),
2074 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2076 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2077 multiclass Neon_shiftNarrow_patterns<string shr> {
2078 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2080 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2081 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2083 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2084 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2086 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2088 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2089 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2090 VPR128:$Rn, (i32 imm:$Imm))))))),
2091 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2092 VPR128:$Rn, imm:$Imm)>;
2093 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2094 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2095 VPR128:$Rn, (i32 imm:$Imm))))))),
2096 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2097 VPR128:$Rn, imm:$Imm)>;
2098 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2099 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2100 VPR128:$Rn, (i32 imm:$Imm))))))),
2101 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2102 VPR128:$Rn, imm:$Imm)>;
2105 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2106 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2107 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2108 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2109 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2110 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2111 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2113 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2114 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2115 (!cast<Instruction>(prefix # "_16B")
2116 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2117 VPR128:$Rn, imm:$Imm)>;
2118 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2119 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2120 (!cast<Instruction>(prefix # "_8H")
2121 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2122 VPR128:$Rn, imm:$Imm)>;
2123 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2124 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2125 (!cast<Instruction>(prefix # "_4S")
2126 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2127 VPR128:$Rn, imm:$Imm)>;
2130 defm : Neon_shiftNarrow_patterns<"lshr">;
2131 defm : Neon_shiftNarrow_patterns<"ashr">;
2133 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2134 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2135 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2136 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2137 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2138 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2139 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2141 // Convert fix-point and float-pointing
2142 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2143 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2144 Operand ImmTy, SDPatternOperator IntOp>
2145 : NeonI_2VShiftImm<q, u, opcode,
2146 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2147 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2148 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2152 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2153 SDPatternOperator IntOp> {
2154 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2156 let Inst{22-21} = 0b01;
2159 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2161 let Inst{22-21} = 0b01;
2164 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2170 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2171 SDPatternOperator IntOp> {
2172 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2174 let Inst{22-21} = 0b01;
2177 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2179 let Inst{22-21} = 0b01;
2182 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2188 // Convert fixed-point to floating-point
2189 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2190 int_arm_neon_vcvtfxs2fp>;
2191 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2192 int_arm_neon_vcvtfxu2fp>;
2194 // Convert floating-point to fixed-point
2195 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2196 int_arm_neon_vcvtfp2fxs>;
2197 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2198 int_arm_neon_vcvtfp2fxu>;
2200 multiclass Neon_sshll2_0<SDNode ext>
2202 def _v8i8 : PatFrag<(ops node:$Rn),
2203 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2204 def _v4i16 : PatFrag<(ops node:$Rn),
2205 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2206 def _v2i32 : PatFrag<(ops node:$Rn),
2207 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2210 defm NI_sext_high : Neon_sshll2_0<sext>;
2211 defm NI_zext_high : Neon_sshll2_0<zext>;
2214 //===----------------------------------------------------------------------===//
2215 // Multiclasses for NeonI_Across
2216 //===----------------------------------------------------------------------===//
2220 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2221 string asmop, SDPatternOperator opnode>
2223 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2224 (outs FPR16:$Rd), (ins VPR64:$Rn),
2225 asmop # "\t$Rd, $Rn.8b",
2226 [(set (v1i16 FPR16:$Rd),
2227 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2230 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2231 (outs FPR16:$Rd), (ins VPR128:$Rn),
2232 asmop # "\t$Rd, $Rn.16b",
2233 [(set (v1i16 FPR16:$Rd),
2234 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2237 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2238 (outs FPR32:$Rd), (ins VPR64:$Rn),
2239 asmop # "\t$Rd, $Rn.4h",
2240 [(set (v1i32 FPR32:$Rd),
2241 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2244 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2245 (outs FPR32:$Rd), (ins VPR128:$Rn),
2246 asmop # "\t$Rd, $Rn.8h",
2247 [(set (v1i32 FPR32:$Rd),
2248 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2251 // _1d2s doesn't exist!
2253 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2254 (outs FPR64:$Rd), (ins VPR128:$Rn),
2255 asmop # "\t$Rd, $Rn.4s",
2256 [(set (v1i64 FPR64:$Rd),
2257 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2261 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2262 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2266 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2267 string asmop, SDPatternOperator opnode>
2269 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2270 (outs FPR8:$Rd), (ins VPR64:$Rn),
2271 asmop # "\t$Rd, $Rn.8b",
2272 [(set (v1i8 FPR8:$Rd),
2273 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2276 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2277 (outs FPR8:$Rd), (ins VPR128:$Rn),
2278 asmop # "\t$Rd, $Rn.16b",
2279 [(set (v1i8 FPR8:$Rd),
2280 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2283 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2284 (outs FPR16:$Rd), (ins VPR64:$Rn),
2285 asmop # "\t$Rd, $Rn.4h",
2286 [(set (v1i16 FPR16:$Rd),
2287 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2290 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2291 (outs FPR16:$Rd), (ins VPR128:$Rn),
2292 asmop # "\t$Rd, $Rn.8h",
2293 [(set (v1i16 FPR16:$Rd),
2294 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2297 // _1s2s doesn't exist!
2299 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2300 (outs FPR32:$Rd), (ins VPR128:$Rn),
2301 asmop # "\t$Rd, $Rn.4s",
2302 [(set (v1i32 FPR32:$Rd),
2303 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2307 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2308 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2310 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2311 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2313 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2317 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2318 string asmop, SDPatternOperator opnode>
2320 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2321 (outs FPR32:$Rd), (ins VPR128:$Rn),
2322 asmop # "\t$Rd, $Rn.4s",
2323 [(set (v1f32 FPR32:$Rd),
2324 (v1f32 (opnode (v4f32 VPR128:$Rn))))],
2328 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2329 int_aarch64_neon_vmaxnmv>;
2330 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2331 int_aarch64_neon_vminnmv>;
2333 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2334 int_aarch64_neon_vmaxv>;
2335 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2336 int_aarch64_neon_vminv>;
2338 // The followings are for instruction class (3V Diff)
2340 // normal long/long2 pattern
2341 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2342 string asmop, string ResS, string OpS,
2343 SDPatternOperator opnode, SDPatternOperator ext,
2344 RegisterOperand OpVPR,
2345 ValueType ResTy, ValueType OpTy>
2346 : NeonI_3VDiff<q, u, size, opcode,
2347 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2348 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2349 [(set (ResTy VPR128:$Rd),
2350 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2351 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2354 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2355 string asmop, SDPatternOperator opnode,
2358 let isCommutable = Commutable in {
2359 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2360 opnode, sext, VPR64, v8i16, v8i8>;
2361 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2362 opnode, sext, VPR64, v4i32, v4i16>;
2363 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2364 opnode, sext, VPR64, v2i64, v2i32>;
2368 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode,
2369 string asmop, SDPatternOperator opnode,
2372 let isCommutable = Commutable in {
2373 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2374 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2375 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2376 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2377 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2378 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2382 multiclass NeonI_3VDL_u<bit u, bits<4> opcode,
2383 string asmop, SDPatternOperator opnode,
2386 let isCommutable = Commutable in {
2387 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2388 opnode, zext, VPR64, v8i16, v8i8>;
2389 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2390 opnode, zext, VPR64, v4i32, v4i16>;
2391 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2392 opnode, zext, VPR64, v2i64, v2i32>;
2396 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode,
2397 string asmop, SDPatternOperator opnode,
2400 let isCommutable = Commutable in {
2401 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2402 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2403 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2404 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2405 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2406 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2410 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2411 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2413 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2414 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2416 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2417 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2419 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2420 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2422 // normal wide/wide2 pattern
2423 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2424 string asmop, string ResS, string OpS,
2425 SDPatternOperator opnode, SDPatternOperator ext,
2426 RegisterOperand OpVPR,
2427 ValueType ResTy, ValueType OpTy>
2428 : NeonI_3VDiff<q, u, size, opcode,
2429 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2430 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2431 [(set (ResTy VPR128:$Rd),
2432 (ResTy (opnode (ResTy VPR128:$Rn),
2433 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2436 multiclass NeonI_3VDW_s<bit u, bits<4> opcode,
2437 string asmop, SDPatternOperator opnode>
2439 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2440 opnode, sext, VPR64, v8i16, v8i8>;
2441 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2442 opnode, sext, VPR64, v4i32, v4i16>;
2443 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2444 opnode, sext, VPR64, v2i64, v2i32>;
2447 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2448 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2450 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode,
2451 string asmop, SDPatternOperator opnode>
2453 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2454 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2455 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2456 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2457 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2458 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2461 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2462 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2464 multiclass NeonI_3VDW_u<bit u, bits<4> opcode,
2465 string asmop, SDPatternOperator opnode>
2467 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2468 opnode, zext, VPR64, v8i16, v8i8>;
2469 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2470 opnode, zext, VPR64, v4i32, v4i16>;
2471 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2472 opnode, zext, VPR64, v2i64, v2i32>;
2475 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2476 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2478 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode,
2479 string asmop, SDPatternOperator opnode>
2481 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2482 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2483 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2484 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2485 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2486 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2489 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2490 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2492 // Get the high half part of the vector element.
2493 multiclass NeonI_get_high
2495 def _8h : PatFrag<(ops node:$Rn),
2496 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2497 (v8i16 (Neon_vdup (i32 8)))))))>;
2498 def _4s : PatFrag<(ops node:$Rn),
2499 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2500 (v4i32 (Neon_vdup (i32 16)))))))>;
2501 def _2d : PatFrag<(ops node:$Rn),
2502 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2503 (v2i64 (Neon_vdup (i32 32)))))))>;
2506 defm NI_get_hi : NeonI_get_high;
2508 // pattern for addhn/subhn with 2 operands
2509 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2510 string asmop, string ResS, string OpS,
2511 SDPatternOperator opnode, SDPatternOperator get_hi,
2512 ValueType ResTy, ValueType OpTy>
2513 : NeonI_3VDiff<q, u, size, opcode,
2514 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2515 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2516 [(set (ResTy VPR64:$Rd),
2518 (OpTy (opnode (OpTy VPR128:$Rn),
2519 (OpTy VPR128:$Rm))))))],
2522 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode,
2523 string asmop, SDPatternOperator opnode,
2526 let isCommutable = Commutable in {
2527 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2528 opnode, NI_get_hi_8h, v8i8, v8i16>;
2529 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2530 opnode, NI_get_hi_4s, v4i16, v4i32>;
2531 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2532 opnode, NI_get_hi_2d, v2i32, v2i64>;
2536 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2537 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2539 // pattern for operation with 2 operands
2540 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2541 string asmop, string ResS, string OpS,
2542 SDPatternOperator opnode,
2543 RegisterOperand ResVPR, RegisterOperand OpVPR,
2544 ValueType ResTy, ValueType OpTy>
2545 : NeonI_3VDiff<q, u, size, opcode,
2546 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2547 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2548 [(set (ResTy ResVPR:$Rd),
2549 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2552 // normal narrow pattern
2553 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
2554 string asmop, SDPatternOperator opnode,
2557 let isCommutable = Commutable in {
2558 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2559 opnode, VPR64, VPR128, v8i8, v8i16>;
2560 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2561 opnode, VPR64, VPR128, v4i16, v4i32>;
2562 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2563 opnode, VPR64, VPR128, v2i32, v2i64>;
2567 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2568 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2570 // pattern for acle intrinsic with 3 operands
2571 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2572 string asmop, string ResS, string OpS>
2573 : NeonI_3VDiff<q, u, size, opcode,
2574 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2575 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2577 let Constraints = "$src = $Rd";
2578 let neverHasSideEffects = 1;
2581 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
2583 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2584 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2585 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2588 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2589 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2591 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2592 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2594 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2596 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2597 SDPatternOperator coreop>
2598 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2599 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2600 (SrcTy VPR128:$Rm)))))),
2601 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2602 VPR128:$Rn, VPR128:$Rm)>;
2605 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2606 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2607 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2608 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2609 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2610 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2613 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2614 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2615 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2616 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2617 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2618 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2621 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2622 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2623 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2626 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2627 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2628 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2630 // pattern that need to extend result
2631 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2632 string asmop, string ResS, string OpS,
2633 SDPatternOperator opnode,
2634 RegisterOperand OpVPR,
2635 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2636 : NeonI_3VDiff<q, u, size, opcode,
2637 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2638 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2639 [(set (ResTy VPR128:$Rd),
2640 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2641 (OpTy OpVPR:$Rm))))))],
2644 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode,
2645 string asmop, SDPatternOperator opnode,
2648 let isCommutable = Commutable in {
2649 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2650 opnode, VPR64, v8i16, v8i8, v8i8>;
2651 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2652 opnode, VPR64, v4i32, v4i16, v4i16>;
2653 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2654 opnode, VPR64, v2i64, v2i32, v2i32>;
2658 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2659 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2661 multiclass NeonI_Op_High<SDPatternOperator op>
2663 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2664 (op (v8i8 (Neon_High16B node:$Rn)), (v8i8 (Neon_High16B node:$Rm)))>;
2665 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
2666 (op (v4i16 (Neon_High8H node:$Rn)), (v4i16 (Neon_High8H node:$Rm)))>;
2667 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
2668 (op (v2i32 (Neon_High4S node:$Rn)), (v2i32 (Neon_High4S node:$Rm)))>;
2672 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2673 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2674 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2675 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2676 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2677 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2679 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode,
2680 string asmop, string opnode,
2683 let isCommutable = Commutable in {
2684 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2685 !cast<PatFrag>(opnode # "_16B"),
2686 VPR128, v8i16, v16i8, v8i8>;
2687 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2688 !cast<PatFrag>(opnode # "_8H"),
2689 VPR128, v4i32, v8i16, v4i16>;
2690 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2691 !cast<PatFrag>(opnode # "_4S"),
2692 VPR128, v2i64, v4i32, v2i32>;
2696 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2697 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2699 // For pattern that need two operators being chained.
2700 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2701 string asmop, string ResS, string OpS,
2702 SDPatternOperator opnode, SDPatternOperator subop,
2703 RegisterOperand OpVPR,
2704 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2705 : NeonI_3VDiff<q, u, size, opcode,
2706 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2707 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2708 [(set (ResTy VPR128:$Rd),
2710 (ResTy VPR128:$src),
2711 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2712 (OpTy OpVPR:$Rm))))))))],
2714 let Constraints = "$src = $Rd";
2717 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode,
2718 string asmop, SDPatternOperator opnode,
2719 SDPatternOperator subop>
2721 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2722 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2723 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2724 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2725 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2726 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2729 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2730 add, int_arm_neon_vabds>;
2731 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2732 add, int_arm_neon_vabdu>;
2734 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode,
2735 string asmop, SDPatternOperator opnode,
2738 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2739 opnode, !cast<PatFrag>(subop # "_16B"),
2740 VPR128, v8i16, v16i8, v8i8>;
2741 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2742 opnode, !cast<PatFrag>(subop # "_8H"),
2743 VPR128, v4i32, v8i16, v4i16>;
2744 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2745 opnode, !cast<PatFrag>(subop # "_4S"),
2746 VPR128, v2i64, v4i32, v2i32>;
2749 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2751 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2754 // Long pattern with 2 operands
2755 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode,
2756 string asmop, SDPatternOperator opnode,
2759 let isCommutable = Commutable in {
2760 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2761 opnode, VPR128, VPR64, v8i16, v8i8>;
2762 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2763 opnode, VPR128, VPR64, v4i32, v4i16>;
2764 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2765 opnode, VPR128, VPR64, v2i64, v2i32>;
2769 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2770 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2772 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2773 string asmop, string ResS, string OpS,
2774 SDPatternOperator opnode,
2775 ValueType ResTy, ValueType OpTy>
2776 : NeonI_3VDiff<q, u, size, opcode,
2777 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2778 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2779 [(set (ResTy VPR128:$Rd),
2780 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2784 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode,
2789 let isCommutable = Commutable in {
2790 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2791 !cast<PatFrag>(opnode # "_16B"),
2793 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2794 !cast<PatFrag>(opnode # "_8H"),
2796 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2797 !cast<PatFrag>(opnode # "_4S"),
2802 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2804 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2807 // Long pattern with 3 operands
2808 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2809 string asmop, string ResS, string OpS,
2810 SDPatternOperator opnode,
2811 ValueType ResTy, ValueType OpTy>
2812 : NeonI_3VDiff<q, u, size, opcode,
2813 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2814 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2815 [(set (ResTy VPR128:$Rd),
2817 (ResTy VPR128:$src),
2818 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2820 let Constraints = "$src = $Rd";
2823 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode,
2824 string asmop, SDPatternOperator opnode>
2826 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2827 opnode, v8i16, v8i8>;
2828 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2829 opnode, v4i32, v4i16>;
2830 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2831 opnode, v2i64, v2i32>;
2834 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2836 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2838 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2840 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2842 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2844 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2846 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2848 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2850 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2851 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2853 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2854 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2856 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2857 string asmop, string ResS, string OpS,
2858 SDPatternOperator subop, SDPatternOperator opnode,
2859 RegisterOperand OpVPR,
2860 ValueType ResTy, ValueType OpTy>
2861 : NeonI_3VDiff<q, u, size, opcode,
2862 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2863 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2864 [(set (ResTy VPR128:$Rd),
2866 (ResTy VPR128:$src),
2867 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
2869 let Constraints = "$src = $Rd";
2872 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode,
2874 SDPatternOperator subop,
2877 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2878 subop, !cast<PatFrag>(opnode # "_16B"),
2879 VPR128, v8i16, v16i8>;
2880 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2881 subop, !cast<PatFrag>(opnode # "_8H"),
2882 VPR128, v4i32, v8i16>;
2883 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2884 subop, !cast<PatFrag>(opnode # "_4S"),
2885 VPR128, v2i64, v4i32>;
2888 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
2889 add, "NI_smull_hi">;
2890 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
2891 add, "NI_umull_hi">;
2893 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
2894 sub, "NI_smull_hi">;
2895 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
2896 sub, "NI_umull_hi">;
2898 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode,
2899 string asmop, SDPatternOperator opnode>
2901 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2902 opnode, int_arm_neon_vqdmull,
2903 VPR64, v4i32, v4i16>;
2904 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2905 opnode, int_arm_neon_vqdmull,
2906 VPR64, v2i64, v2i32>;
2909 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
2910 int_arm_neon_vqadds>;
2911 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
2912 int_arm_neon_vqsubs>;
2914 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode,
2915 string asmop, SDPatternOperator opnode,
2918 let isCommutable = Commutable in {
2919 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2920 opnode, VPR128, VPR64, v4i32, v4i16>;
2921 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2922 opnode, VPR128, VPR64, v2i64, v2i32>;
2926 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
2927 int_arm_neon_vqdmull, 1>;
2929 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode,
2934 let isCommutable = Commutable in {
2935 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2936 !cast<PatFrag>(opnode # "_8H"),
2938 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2939 !cast<PatFrag>(opnode # "_4S"),
2944 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
2947 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode,
2949 SDPatternOperator opnode>
2951 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2952 opnode, NI_qdmull_hi_8H,
2953 VPR128, v4i32, v8i16>;
2954 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2955 opnode, NI_qdmull_hi_4S,
2956 VPR128, v2i64, v4i32>;
2959 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
2960 int_arm_neon_vqadds>;
2961 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
2962 int_arm_neon_vqsubs>;
2964 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode,
2965 string asmop, SDPatternOperator opnode,
2968 let isCommutable = Commutable in {
2969 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2970 opnode, VPR128, VPR64, v8i16, v8i8>;
2974 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
2976 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode,
2981 let isCommutable = Commutable in {
2982 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2983 !cast<PatFrag>(opnode # "_16B"),
2988 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",
2991 // End of implementation for instruction class (3V Diff)
2993 // The followings are vector load/store multiple N-element structure
2994 // (class SIMD lselem).
2996 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
2997 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
2998 // The structure consists of a sequence of sets of N values.
2999 // The first element of the structure is placed in the first lane
3000 // of the first first vector, the second element in the first lane
3001 // of the second vector, and so on.
3002 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3003 // the three 64-bit vectors list {BA, DC, FE}.
3004 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3005 // 64-bit vectors list {DA, EB, FC}.
3006 // Store instructions store multiple structure to N registers like load.
3009 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3010 RegisterOperand VecList, string asmop>
3011 : NeonI_LdStMult<q, 1, opcode, size,
3012 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3013 asmop # "\t$Rt, [$Rn]",
3017 let neverHasSideEffects = 1;
3020 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3021 def _8B : NeonI_LDVList<0, opcode, 0b00,
3022 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3024 def _4H : NeonI_LDVList<0, opcode, 0b01,
3025 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3027 def _2S : NeonI_LDVList<0, opcode, 0b10,
3028 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3030 def _16B : NeonI_LDVList<1, opcode, 0b00,
3031 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3033 def _8H : NeonI_LDVList<1, opcode, 0b01,
3034 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3036 def _4S : NeonI_LDVList<1, opcode, 0b10,
3037 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3039 def _2D : NeonI_LDVList<1, opcode, 0b11,
3040 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3043 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3044 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3045 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3047 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3049 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3051 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3053 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3054 defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">;
3055 def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3057 defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3058 def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3060 defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3061 def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3063 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3064 RegisterOperand VecList, string asmop>
3065 : NeonI_LdStMult<q, 0, opcode, size,
3066 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3067 asmop # "\t$Rt, [$Rn]",
3071 let neverHasSideEffects = 1;
3074 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3075 def _8B : NeonI_STVList<0, opcode, 0b00,
3076 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3078 def _4H : NeonI_STVList<0, opcode, 0b01,
3079 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3081 def _2S : NeonI_STVList<0, opcode, 0b10,
3082 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3084 def _16B : NeonI_STVList<1, opcode, 0b00,
3085 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3087 def _8H : NeonI_STVList<1, opcode, 0b01,
3088 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3090 def _4S : NeonI_STVList<1, opcode, 0b10,
3091 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3093 def _2D : NeonI_STVList<1, opcode, 0b11,
3094 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3097 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3098 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3099 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3101 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3103 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3105 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3107 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3108 defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">;
3109 def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3111 defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">;
3112 def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3114 defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">;
3115 def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3117 // End of vector load/store multiple N-element structure(class SIMD lselem)
3119 // Scalar Three Same
3121 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
3122 : NeonI_Scalar3Same<u, 0b11, opcode,
3123 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3124 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3128 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode,
3129 string asmop, bit Commutable = 0>
3131 let isCommutable = Commutable in {
3132 def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
3133 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
3134 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3137 def sss : NeonI_Scalar3Same<u, 0b10, opcode,
3138 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3139 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3145 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
3146 string asmop, bit Commutable = 0>
3148 let isCommutable = Commutable in {
3149 def sss : NeonI_Scalar3Same<u, {size_high, 0b0}, opcode,
3150 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3151 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3154 def ddd : NeonI_Scalar3Same<u, {size_high, 0b1}, opcode,
3155 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3156 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3162 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
3163 string asmop, bit Commutable = 0>
3165 let isCommutable = Commutable in {
3166 def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
3167 (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
3168 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3171 def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
3172 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
3173 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3176 def sss : NeonI_Scalar3Same<u, 0b10, opcode,
3177 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3178 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3181 def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
3182 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3183 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3189 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
3190 Instruction INSTD> {
3191 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3192 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3195 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
3200 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
3201 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
3202 (INSTB FPR8:$Rn, FPR8:$Rm)>;
3204 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3205 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3207 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3208 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3211 class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
3213 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3214 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3216 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
3218 Instruction INSTS> {
3219 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3220 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3221 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3222 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3225 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
3227 Instruction INSTD> {
3228 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3229 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3230 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3231 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3234 multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
3236 Instruction INSTD> {
3237 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3238 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3239 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3240 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3243 // Scalar Three Different
3245 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
3246 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
3247 (outs FPR32:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
3248 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3251 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
3252 (outs FPR64:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3253 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3258 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
3259 let Constraints = "$Src = $Rd" in {
3260 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
3261 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
3262 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3265 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
3266 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
3267 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3273 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
3275 Instruction INSTS> {
3276 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3277 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3278 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3279 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3282 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
3284 Instruction INSTS> {
3285 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3286 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
3287 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3288 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
3291 // Scalar Two Registers Miscellaneous
3293 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
3295 def ss : NeonI_Scalar2SameMisc<u, {size_high, 0b0}, opcode,
3296 (outs FPR32:$Rd), (ins FPR32:$Rn),
3297 !strconcat(asmop, " $Rd, $Rn"),
3299 def dd : NeonI_Scalar2SameMisc<u, {size_high, 0b1}, opcode,
3300 (outs FPR64:$Rd), (ins FPR64:$Rn),
3301 !strconcat(asmop, " $Rd, $Rn"),
3305 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
3306 def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
3307 (outs FPR64:$Rd), (ins FPR64:$Rn),
3308 !strconcat(asmop, " $Rd, $Rn"),
3312 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
3313 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
3314 def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
3315 (outs FPR8:$Rd), (ins FPR8:$Rn),
3316 !strconcat(asmop, " $Rd, $Rn"),
3318 def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
3319 (outs FPR16:$Rd), (ins FPR16:$Rn),
3320 !strconcat(asmop, " $Rd, $Rn"),
3322 def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3323 (outs FPR32:$Rd), (ins FPR32:$Rn),
3324 !strconcat(asmop, " $Rd, $Rn"),
3328 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
3330 def bh : NeonI_Scalar2SameMisc<u, 0b00, opcode,
3331 (outs FPR8:$Rd), (ins FPR16:$Rn),
3332 !strconcat(asmop, " $Rd, $Rn"),
3334 def hs : NeonI_Scalar2SameMisc<u, 0b01, opcode,
3335 (outs FPR16:$Rd), (ins FPR32:$Rn),
3336 !strconcat(asmop, " $Rd, $Rn"),
3338 def sd : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3339 (outs FPR32:$Rd), (ins FPR64:$Rn),
3340 !strconcat(asmop, " $Rd, $Rn"),
3344 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
3347 let Constraints = "$Src = $Rd" in {
3348 def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
3349 (outs FPR8:$Rd), (ins FPR8:$Src, FPR8:$Rn),
3350 !strconcat(asmop, " $Rd, $Rn"),
3352 def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
3353 (outs FPR16:$Rd), (ins FPR16:$Src, FPR16:$Rn),
3354 !strconcat(asmop, " $Rd, $Rn"),
3356 def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3357 (outs FPR32:$Rd), (ins FPR32:$Src, FPR32:$Rn),
3358 !strconcat(asmop, " $Rd, $Rn"),
3360 def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
3361 (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn),
3362 !strconcat(asmop, " $Rd, $Rn"),
3367 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
3368 SDPatternOperator Dopnode,
3370 Instruction INSTD> {
3371 def : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn))),
3373 def : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn))),
3377 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
3379 Instruction INSTD> {
3380 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
3382 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
3386 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
3387 : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3388 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
3389 !strconcat(asmop, " $Rd, $Rn, $Imm"),
3393 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
3395 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3396 (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
3397 !strconcat(asmop, " $Rd, $Rn, $FPImm"),
3400 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3401 (outs FPR64:$Rd), (ins FPR64:$Rn, fpz64movi:$FPImm),
3402 !strconcat(asmop, " $Rd, $Rn, $FPImm"),
3407 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
3409 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
3410 (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))),
3411 (INSTD FPR64:$Rn, 0)>;
3413 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
3415 Instruction INSTD> {
3416 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
3417 (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))),
3418 (INSTS FPR32:$Rn, fpimm:$FPImm)>;
3419 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
3420 (v1f64 (bitconvert (v8i8 Neon_immAllZeros))))),
3421 (INSTD FPR64:$Rn, 0)>;
3424 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
3425 Instruction INSTD> {
3426 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
3430 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
3435 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
3436 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
3438 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
3440 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
3444 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
3445 SDPatternOperator opnode,
3448 Instruction INSTD> {
3449 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
3451 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
3453 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
3458 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
3459 SDPatternOperator opnode,
3463 Instruction INSTD> {
3464 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
3465 (INSTB FPR8:$Src, FPR8:$Rn)>;
3466 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
3467 (INSTH FPR16:$Src, FPR16:$Rn)>;
3468 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
3469 (INSTS FPR32:$Src, FPR32:$Rn)>;
3470 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
3471 (INSTD FPR64:$Src, FPR64:$Rn)>;
3474 // Scalar Integer Add
3475 let isCommutable = 1 in {
3476 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
3479 // Scalar Integer Sub
3480 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
3482 // Pattern for Scalar Integer Add and Sub with D register only
3483 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
3484 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
3486 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
3487 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
3488 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
3489 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
3490 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
3492 // Scalar Integer Saturating Add (Signed, Unsigned)
3493 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
3494 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
3496 // Scalar Integer Saturating Sub (Signed, Unsigned)
3497 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
3498 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
3500 // Patterns to match llvm.arm.* intrinsic for
3501 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3502 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
3503 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
3504 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
3505 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
3507 // Patterns to match llvm.aarch64.* intrinsic for
3508 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3509 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
3510 SQADDhhh, SQADDsss, SQADDddd>;
3511 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
3512 UQADDhhh, UQADDsss, UQADDddd>;
3513 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
3514 SQSUBhhh, SQSUBsss, SQSUBddd>;
3515 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
3516 UQSUBhhh, UQSUBsss, UQSUBddd>;
3518 // Scalar Integer Saturating Doubling Multiply Half High
3519 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
3521 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3522 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
3524 // Patterns to match llvm.arm.* intrinsic for
3525 // Scalar Integer Saturating Doubling Multiply Half High and
3526 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3527 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
3529 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
3532 // Scalar Floating-point Multiply Extended
3533 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
3535 // Scalar Floating-point Reciprocal Step
3536 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
3538 // Scalar Floating-point Reciprocal Square Root Step
3539 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
3541 // Patterns to match llvm.arm.* intrinsic for
3542 // Scalar Floating-point Reciprocal Step and
3543 // Scalar Floating-point Reciprocal Square Root Step
3544 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
3546 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
3549 // Patterns to match llvm.aarch64.* intrinsic for
3550 // Scalar Floating-point Multiply Extended,
3551 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vmulx, FMULXsss,
3554 // Scalar Integer Shift Left (Signed, Unsigned)
3555 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
3556 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
3558 // Patterns to match llvm.arm.* intrinsic for
3559 // Scalar Integer Shift Left (Signed, Unsigned)
3560 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
3561 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
3563 // Patterns to match llvm.aarch64.* intrinsic for
3564 // Scalar Integer Shift Left (Signed, Unsigned)
3565 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
3566 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
3568 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
3569 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
3570 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
3572 // Patterns to match llvm.aarch64.* intrinsic for
3573 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
3574 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
3575 SQSHLhhh, SQSHLsss, SQSHLddd>;
3576 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
3577 UQSHLhhh, UQSHLsss, UQSHLddd>;
3579 // Patterns to match llvm.arm.* intrinsic for
3580 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
3581 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
3582 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
3584 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3585 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
3586 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
3588 // Patterns to match llvm.aarch64.* intrinsic for
3589 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3590 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
3591 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
3593 // Patterns to match llvm.arm.* intrinsic for
3594 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3595 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
3596 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
3598 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3599 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
3600 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
3602 // Patterns to match llvm.aarch64.* intrinsic for
3603 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3604 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
3605 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
3606 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
3607 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
3609 // Patterns to match llvm.arm.* intrinsic for
3610 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3611 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
3612 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
3614 // Signed Saturating Doubling Multiply-Add Long
3615 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
3616 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
3617 SQDMLALshh, SQDMLALdss>;
3619 // Signed Saturating Doubling Multiply-Subtract Long
3620 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
3621 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
3622 SQDMLSLshh, SQDMLSLdss>;
3624 // Signed Saturating Doubling Multiply Long
3625 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
3626 defm : Neon_Scalar3Diff_HS_size_patterns<int_aarch64_neon_vqdmull,
3627 SQDMULLshh, SQDMULLdss>;
3629 // Scalar Signed Integer Convert To Floating-point
3630 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
3631 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
3632 int_aarch64_neon_vcvtf64_s64,
3635 // Scalar Unsigned Integer Convert To Floating-point
3636 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
3637 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
3638 int_aarch64_neon_vcvtf64_u64,
3641 // Scalar Floating-point Reciprocal Estimate
3642 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
3643 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
3644 FRECPEss, FRECPEdd>;
3646 // Scalar Floating-point Reciprocal Exponent
3647 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
3648 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
3649 FRECPXss, FRECPXdd>;
3651 // Scalar Floating-point Reciprocal Square Root Estimate
3652 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
3653 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
3654 FRSQRTEss, FRSQRTEdd>;
3656 // Scalar Integer Compare
3658 // Scalar Compare Bitwise Equal
3659 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
3660 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
3662 // Scalar Compare Signed Greather Than Or Equal
3663 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
3664 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
3666 // Scalar Compare Unsigned Higher Or Same
3667 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
3668 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
3670 // Scalar Compare Unsigned Higher
3671 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
3672 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
3674 // Scalar Compare Signed Greater Than
3675 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
3676 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
3678 // Scalar Compare Bitwise Test Bits
3679 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
3680 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
3682 // Scalar Compare Bitwise Equal To Zero
3683 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
3684 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
3687 // Scalar Compare Signed Greather Than Or Equal To Zero
3688 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
3689 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
3692 // Scalar Compare Signed Greater Than Zero
3693 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
3694 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
3697 // Scalar Compare Signed Less Than Or Equal To Zero
3698 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
3699 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
3702 // Scalar Compare Less Than Zero
3703 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
3704 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
3707 // Scalar Floating-point Compare
3709 // Scalar Floating-point Compare Mask Equal
3710 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
3711 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
3712 FCMEQsss, FCMEQddd>;
3714 // Scalar Floating-point Compare Mask Equal To Zero
3715 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
3716 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
3717 FCMEQZssi, FCMEQZddi>;
3719 // Scalar Floating-point Compare Mask Greater Than Or Equal
3720 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
3721 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
3722 FCMGEsss, FCMGEddd>;
3724 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
3725 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
3726 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
3727 FCMGEZssi, FCMGEZddi>;
3729 // Scalar Floating-point Compare Mask Greather Than
3730 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
3731 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
3732 FCMGTsss, FCMGTddd>;
3734 // Scalar Floating-point Compare Mask Greather Than Zero
3735 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
3736 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
3737 FCMGTZssi, FCMGTZddi>;
3739 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
3740 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
3741 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
3742 FCMLEZssi, FCMLEZddi>;
3744 // Scalar Floating-point Compare Mask Less Than Zero
3745 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
3746 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
3747 FCMLTZssi, FCMLTZddi>;
3749 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
3750 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
3751 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
3752 FACGEsss, FACGEddd>;
3754 // Scalar Floating-point Absolute Compare Mask Greater Than
3755 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
3756 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
3757 FACGTsss, FACGTddd>;
3759 // Scalar Absolute Value
3760 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
3761 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
3763 // Scalar Signed Saturating Absolute Value
3764 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
3765 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
3766 SQABSbb, SQABShh, SQABSss, SQABSdd>;
3769 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
3770 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
3772 // Scalar Signed Saturating Negate
3773 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
3774 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
3775 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
3777 // Scalar Signed Saturating Accumulated of Unsigned Value
3778 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
3779 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
3781 SUQADDss, SUQADDdd>;
3783 // Scalar Unsigned Saturating Accumulated of Signed Value
3784 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
3785 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
3787 USQADDss, USQADDdd>;
3789 // Scalar Signed Saturating Extract Unsigned Narrow
3790 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
3791 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
3795 // Scalar Signed Saturating Extract Narrow
3796 defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
3797 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
3801 // Scalar Unsigned Saturating Extract Narrow
3802 defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
3803 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
3807 // Scalar Reduce Pairwise
3809 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
3810 string asmop, bit Commutable = 0> {
3811 let isCommutable = Commutable in {
3812 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
3813 (outs FPR64:$Rd), (ins VPR128:$Rn),
3814 !strconcat(asmop, " $Rd, $Rn.2d"),
3820 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
3821 string asmop, bit Commutable = 0>
3822 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
3823 let isCommutable = Commutable in {
3824 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
3825 (outs FPR32:$Rd), (ins VPR64:$Rn),
3826 !strconcat(asmop, " $Rd, $Rn.2s"),
3832 // Scalar Reduce Addition Pairwise (Integer) with
3833 // Pattern to match llvm.arm.* intrinsic
3834 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
3836 // Pattern to match llvm.aarch64.* intrinsic for
3837 // Scalar Reduce Addition Pairwise (Integer)
3838 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
3839 (ADDPvv_D_2D VPR128:$Rn)>;
3841 // Scalar Reduce Addition Pairwise (Floating Point)
3842 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
3844 // Scalar Reduce Maximum Pairwise (Floating Point)
3845 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
3847 // Scalar Reduce Minimum Pairwise (Floating Point)
3848 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
3850 // Scalar Reduce maxNum Pairwise (Floating Point)
3851 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
3853 // Scalar Reduce minNum Pairwise (Floating Point)
3854 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
3856 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
3857 SDPatternOperator opnodeD,
3859 Instruction INSTD> {
3860 def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
3862 def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
3863 (INSTD VPR128:$Rn)>;
3866 // Patterns to match llvm.aarch64.* intrinsic for
3867 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
3868 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
3869 int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
3871 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
3872 int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
3874 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
3875 int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
3877 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
3878 int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
3880 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
3881 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
3885 //===----------------------------------------------------------------------===//
3886 // Non-Instruction Patterns
3887 //===----------------------------------------------------------------------===//
3889 // 64-bit vector bitcasts...
3891 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
3892 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
3893 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
3894 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
3896 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
3897 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
3898 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
3899 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
3901 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
3902 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
3903 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
3904 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
3906 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
3907 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
3908 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
3909 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
3911 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
3912 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
3913 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
3914 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
3916 // ..and 128-bit vector bitcasts...
3918 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
3919 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
3920 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
3921 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
3922 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
3924 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
3925 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
3926 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
3927 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
3928 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
3930 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
3931 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
3932 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
3933 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
3934 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
3936 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
3937 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
3938 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
3939 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
3940 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
3942 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
3943 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
3944 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
3945 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
3946 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
3948 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
3949 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
3950 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
3951 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
3952 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
3955 // ...and scalar bitcasts...
3956 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
3957 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
3958 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
3959 def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
3960 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
3962 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
3963 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
3965 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
3966 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
3967 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
3969 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
3970 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
3971 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
3972 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
3973 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
3975 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
3976 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
3977 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
3978 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
3979 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
3980 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
3982 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
3983 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
3984 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
3985 def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
3986 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
3988 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
3989 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
3991 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
3992 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
3993 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
3994 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
3995 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
3997 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
3998 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
3999 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
4000 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
4001 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
4002 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
4004 def neon_uimm0_bare : Operand<i64>,
4005 ImmLeaf<i64, [{return Imm == 0;}]> {
4006 let ParserMatchClass = neon_uimm0_asmoperand;
4007 let PrintMethod = "printNeonUImm8OperandBare";
4010 def neon_uimm1_bare : Operand<i64>,
4011 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4012 let ParserMatchClass = neon_uimm1_asmoperand;
4013 let PrintMethod = "printNeonUImm8OperandBare";
4016 def neon_uimm2_bare : Operand<i64>,
4017 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4018 let ParserMatchClass = neon_uimm2_asmoperand;
4019 let PrintMethod = "printNeonUImm8OperandBare";
4022 def neon_uimm3_bare : Operand<i64>,
4023 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4024 let ParserMatchClass = uimm3_asmoperand;
4025 let PrintMethod = "printNeonUImm8OperandBare";
4028 def neon_uimm4_bare : Operand<i64>,
4029 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4030 let ParserMatchClass = uimm4_asmoperand;
4031 let PrintMethod = "printNeonUImm8OperandBare";
4034 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
4035 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
4036 : NeonI_copy<0b1, 0b0, 0b0011,
4037 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
4038 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
4039 [(set (ResTy VPR128:$Rd),
4040 (ResTy (vector_insert
4041 (ResTy VPR128:$src),
4046 let Constraints = "$src = $Rd";
4049 // The followings are for instruction class (3V Elem)
4053 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
4054 string asmop, string ResS, string OpS, string EleOpS,
4055 Operand OpImm, RegisterOperand ResVPR,
4056 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
4057 : NeonI_2VElem<q, u, size, opcode,
4058 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
4059 EleOpVPR:$Re, OpImm:$Index),
4060 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
4061 ", $Re." # EleOpS # "[$Index]",
4067 let Constraints = "$src = $Rd";
4070 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop>
4072 // vector register class for element is always 128-bit to cover the max index
4073 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4074 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4075 let Inst{11} = {Index{1}};
4076 let Inst{21} = {Index{0}};
4077 let Inst{20-16} = Re;
4080 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4081 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4082 let Inst{11} = {Index{1}};
4083 let Inst{21} = {Index{0}};
4084 let Inst{20-16} = Re;
4087 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4088 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
4089 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
4090 let Inst{11} = {Index{2}};
4091 let Inst{21} = {Index{1}};
4092 let Inst{20} = {Index{0}};
4093 let Inst{19-16} = Re{3-0};
4096 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
4097 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4098 let Inst{11} = {Index{2}};
4099 let Inst{21} = {Index{1}};
4100 let Inst{20} = {Index{0}};
4101 let Inst{19-16} = Re{3-0};
4105 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
4106 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
4108 // Pattern for lane in 128-bit vector
4109 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4110 RegisterOperand ResVPR, RegisterOperand OpVPR,
4111 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
4112 ValueType EleOpTy, SDPatternOperator coreop>
4113 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
4114 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4115 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4117 // Pattern for lane in 64-bit vector
4118 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4119 RegisterOperand ResVPR, RegisterOperand OpVPR,
4120 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
4121 ValueType EleOpTy, SDPatternOperator coreop>
4122 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
4123 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4124 (INST ResVPR:$src, OpVPR:$Rn,
4125 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4127 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
4129 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4130 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32,
4131 BinOpFrag<(Neon_vduplane
4132 (Neon_low4S node:$LHS), node:$RHS)>>;
4134 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4135 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32,
4136 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4138 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
4139 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
4140 BinOpFrag<(Neon_vduplane
4141 (Neon_low8H node:$LHS), node:$RHS)>>;
4143 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
4144 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
4145 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4147 // Index can only be half of the max value for lane in 64-bit vector
4149 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4150 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32,
4151 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4153 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4154 op, VPR128, VPR128, VPR64, v4i32, v4i32, v2i32,
4155 BinOpFrag<(Neon_vduplane
4156 (Neon_combine_4S node:$LHS, undef),
4159 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
4160 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
4161 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4163 def : NI_2VE_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
4164 op, VPR128, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
4165 BinOpFrag<(Neon_vduplane
4166 (Neon_combine_8H node:$LHS, undef),
4170 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
4171 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
4173 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
4174 string asmop, string ResS, string OpS, string EleOpS,
4175 Operand OpImm, RegisterOperand ResVPR,
4176 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
4177 : NeonI_2VElem<q, u, size, opcode,
4178 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
4179 EleOpVPR:$Re, OpImm:$Index),
4180 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
4181 ", $Re." # EleOpS # "[$Index]",
4188 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop>
4190 // vector register class for element is always 128-bit to cover the max index
4191 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4192 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4193 let Inst{11} = {Index{1}};
4194 let Inst{21} = {Index{0}};
4195 let Inst{20-16} = Re;
4198 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4199 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4200 let Inst{11} = {Index{1}};
4201 let Inst{21} = {Index{0}};
4202 let Inst{20-16} = Re;
4205 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4206 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
4207 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
4208 let Inst{11} = {Index{2}};
4209 let Inst{21} = {Index{1}};
4210 let Inst{20} = {Index{0}};
4211 let Inst{19-16} = Re{3-0};
4214 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
4215 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4216 let Inst{11} = {Index{2}};
4217 let Inst{21} = {Index{1}};
4218 let Inst{20} = {Index{0}};
4219 let Inst{19-16} = Re{3-0};
4223 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
4224 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
4225 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
4227 // Pattern for lane in 128-bit vector
4228 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4229 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4230 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4231 SDPatternOperator coreop>
4232 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4233 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4234 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4236 // Pattern for lane in 64-bit vector
4237 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4238 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4239 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4240 SDPatternOperator coreop>
4241 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4242 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4244 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4246 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op>
4248 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4249 op, VPR64, VPR128, v2i32, v2i32, v4i32,
4250 BinOpFrag<(Neon_vduplane
4251 (Neon_low4S node:$LHS), node:$RHS)>>;
4253 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4254 op, VPR128, VPR128, v4i32, v4i32, v4i32,
4255 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4257 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
4258 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
4259 BinOpFrag<(Neon_vduplane
4260 (Neon_low8H node:$LHS), node:$RHS)>>;
4262 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
4263 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
4264 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4266 // Index can only be half of the max value for lane in 64-bit vector
4268 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4269 op, VPR64, VPR64, v2i32, v2i32, v2i32,
4270 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4272 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4273 op, VPR128, VPR64, v4i32, v4i32, v2i32,
4274 BinOpFrag<(Neon_vduplane
4275 (Neon_combine_4S node:$LHS, undef),
4278 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
4279 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
4280 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4282 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
4283 op, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
4284 BinOpFrag<(Neon_vduplane
4285 (Neon_combine_8H node:$LHS, undef),
4289 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
4290 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
4291 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
4295 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop>
4297 // vector register class for element is always 128-bit to cover the max index
4298 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4299 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4300 let Inst{11} = {Index{1}};
4301 let Inst{21} = {Index{0}};
4302 let Inst{20-16} = Re;
4305 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4306 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4307 let Inst{11} = {Index{1}};
4308 let Inst{21} = {Index{0}};
4309 let Inst{20-16} = Re;
4312 // _1d2d doesn't exist!
4314 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4315 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4316 let Inst{11} = {Index{0}};
4318 let Inst{20-16} = Re;
4322 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
4323 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
4325 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
4326 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4327 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4328 SDPatternOperator coreop>
4329 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4330 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
4332 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
4334 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op>
4336 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4337 op, VPR64, VPR128, v2f32, v2f32, v4f32,
4338 BinOpFrag<(Neon_vduplane
4339 (Neon_low4f node:$LHS), node:$RHS)>>;
4341 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4342 op, VPR128, VPR128, v4f32, v4f32, v4f32,
4343 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4345 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
4346 op, VPR128, VPR128, v2f64, v2f64, v2f64,
4347 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4349 // Index can only be half of the max value for lane in 64-bit vector
4351 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4352 op, VPR64, VPR64, v2f32, v2f32, v2f32,
4353 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4355 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4356 op, VPR128, VPR64, v4f32, v4f32, v2f32,
4357 BinOpFrag<(Neon_vduplane
4358 (Neon_combine_4f node:$LHS, undef),
4361 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
4362 op, VPR128, VPR64, v2f64, v2f64, v1f64,
4363 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4366 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
4367 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
4369 // The followings are patterns using fma
4370 // -ffp-contract=fast generates fma
4372 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop>
4374 // vector register class for element is always 128-bit to cover the max index
4375 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4376 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4377 let Inst{11} = {Index{1}};
4378 let Inst{21} = {Index{0}};
4379 let Inst{20-16} = Re;
4382 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4383 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4384 let Inst{11} = {Index{1}};
4385 let Inst{21} = {Index{0}};
4386 let Inst{20-16} = Re;
4389 // _1d2d doesn't exist!
4391 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4392 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4393 let Inst{11} = {Index{0}};
4395 let Inst{20-16} = Re;
4399 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
4400 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
4402 // Pattern for lane in 128-bit vector
4403 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4404 RegisterOperand ResVPR, RegisterOperand OpVPR,
4405 ValueType ResTy, ValueType OpTy,
4406 SDPatternOperator coreop>
4407 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4408 (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
4409 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
4411 // Pattern for lane in 64-bit vector
4412 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4413 RegisterOperand ResVPR, RegisterOperand OpVPR,
4414 ValueType ResTy, ValueType OpTy,
4415 SDPatternOperator coreop>
4416 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4417 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4418 (INST ResVPR:$src, ResVPR:$Rn,
4419 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
4421 // Pattern for lane in 64-bit vector
4422 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
4423 SDPatternOperator op,
4424 RegisterOperand ResVPR, RegisterOperand OpVPR,
4425 ValueType ResTy, ValueType OpTy,
4426 SDPatternOperator coreop>
4427 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
4428 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4429 (INST ResVPR:$src, ResVPR:$Rn,
4430 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
4433 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op>
4435 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4436 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4437 BinOpFrag<(Neon_vduplane
4438 (Neon_low4f node:$LHS), node:$RHS)>>;
4440 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4441 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4442 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4444 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4445 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4446 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4448 // Index can only be half of the max value for lane in 64-bit vector
4450 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4451 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4452 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4454 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4455 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4456 BinOpFrag<(Neon_vduplane
4457 (Neon_combine_4f node:$LHS, undef),
4460 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4461 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4462 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4465 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
4467 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
4469 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4470 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4471 BinOpFrag<(fneg (Neon_vduplane
4472 (Neon_low4f node:$LHS), node:$RHS))>>;
4474 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4475 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4476 BinOpFrag<(Neon_vduplane
4477 (Neon_low4f (fneg node:$LHS)),
4480 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4481 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4482 BinOpFrag<(fneg (Neon_vduplane
4483 node:$LHS, node:$RHS))>>;
4485 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4486 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4487 BinOpFrag<(Neon_vduplane
4488 (fneg node:$LHS), node:$RHS)>>;
4490 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4491 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4492 BinOpFrag<(fneg (Neon_vduplane
4493 node:$LHS, node:$RHS))>>;
4495 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4496 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4497 BinOpFrag<(Neon_vduplane
4498 (fneg node:$LHS), node:$RHS)>>;
4500 // Index can only be half of the max value for lane in 64-bit vector
4502 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4503 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4504 BinOpFrag<(fneg (Neon_vduplane
4505 node:$LHS, node:$RHS))>>;
4507 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4508 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4509 BinOpFrag<(Neon_vduplane
4510 (fneg node:$LHS), node:$RHS)>>;
4512 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4513 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4514 BinOpFrag<(fneg (Neon_vduplane
4515 (Neon_combine_4f node:$LHS, undef),
4518 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4519 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4520 BinOpFrag<(Neon_vduplane
4521 (Neon_combine_4f (fneg node:$LHS), undef),
4524 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4525 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4526 BinOpFrag<(fneg (Neon_combine_2d
4527 node:$LHS, node:$RHS))>>;
4529 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4530 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4531 BinOpFrag<(Neon_combine_2d
4532 (fneg node:$LHS), (fneg node:$RHS))>>;
4535 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
4537 // Variant 3: Long type
4538 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
4539 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
4541 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop>
4543 // vector register class for element is always 128-bit to cover the max index
4544 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4545 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4546 let Inst{11} = {Index{1}};
4547 let Inst{21} = {Index{0}};
4548 let Inst{20-16} = Re;
4551 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4552 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4553 let Inst{11} = {Index{1}};
4554 let Inst{21} = {Index{0}};
4555 let Inst{20-16} = Re;
4558 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4559 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4560 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4561 let Inst{11} = {Index{2}};
4562 let Inst{21} = {Index{1}};
4563 let Inst{20} = {Index{0}};
4564 let Inst{19-16} = Re{3-0};
4567 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4568 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4569 let Inst{11} = {Index{2}};
4570 let Inst{21} = {Index{1}};
4571 let Inst{20} = {Index{0}};
4572 let Inst{19-16} = Re{3-0};
4576 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
4577 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
4578 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
4579 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
4580 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
4581 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
4583 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop>
4585 // vector register class for element is always 128-bit to cover the max index
4586 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4587 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4588 let Inst{11} = {Index{1}};
4589 let Inst{21} = {Index{0}};
4590 let Inst{20-16} = Re;
4593 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4594 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4595 let Inst{11} = {Index{1}};
4596 let Inst{21} = {Index{0}};
4597 let Inst{20-16} = Re;
4600 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4601 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4602 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4603 let Inst{11} = {Index{2}};
4604 let Inst{21} = {Index{1}};
4605 let Inst{20} = {Index{0}};
4606 let Inst{19-16} = Re{3-0};
4609 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4610 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4611 let Inst{11} = {Index{2}};
4612 let Inst{21} = {Index{1}};
4613 let Inst{20} = {Index{0}};
4614 let Inst{19-16} = Re{3-0};
4618 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
4619 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
4620 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
4622 // Pattern for lane in 128-bit vector
4623 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4624 RegisterOperand EleOpVPR, ValueType ResTy,
4625 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4626 SDPatternOperator hiop, SDPatternOperator coreop>
4627 : Pat<(ResTy (op (ResTy VPR128:$src),
4628 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4629 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4630 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4632 // Pattern for lane in 64-bit vector
4633 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4634 RegisterOperand EleOpVPR, ValueType ResTy,
4635 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4636 SDPatternOperator hiop, SDPatternOperator coreop>
4637 : Pat<(ResTy (op (ResTy VPR128:$src),
4638 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4639 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4640 (INST VPR128:$src, VPR128:$Rn,
4641 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4643 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op>
4645 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4646 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
4647 BinOpFrag<(Neon_vduplane
4648 (Neon_low8H node:$LHS), node:$RHS)>>;
4650 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4651 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32,
4652 BinOpFrag<(Neon_vduplane
4653 (Neon_low4S node:$LHS), node:$RHS)>>;
4655 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4656 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H,
4657 BinOpFrag<(Neon_vduplane
4658 (Neon_low8H node:$LHS), node:$RHS)>>;
4660 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4661 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4662 BinOpFrag<(Neon_vduplane
4663 (Neon_low4S node:$LHS), node:$RHS)>>;
4665 // Index can only be half of the max value for lane in 64-bit vector
4667 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4668 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
4669 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4671 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4672 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32,
4673 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4675 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4676 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4677 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4679 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4680 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4681 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4684 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
4685 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
4686 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
4687 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
4689 // Pattern for lane in 128-bit vector
4690 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4691 RegisterOperand EleOpVPR, ValueType ResTy,
4692 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4693 SDPatternOperator hiop, SDPatternOperator coreop>
4695 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4696 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4697 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4699 // Pattern for lane in 64-bit vector
4700 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4701 RegisterOperand EleOpVPR, ValueType ResTy,
4702 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4703 SDPatternOperator hiop, SDPatternOperator coreop>
4705 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4706 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4708 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4710 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op>
4712 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4713 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
4714 BinOpFrag<(Neon_vduplane
4715 (Neon_low8H node:$LHS), node:$RHS)>>;
4717 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4718 op, VPR64, VPR128, v2i64, v2i32, v4i32,
4719 BinOpFrag<(Neon_vduplane
4720 (Neon_low4S node:$LHS), node:$RHS)>>;
4722 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4723 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16,
4725 BinOpFrag<(Neon_vduplane
4726 (Neon_low8H node:$LHS), node:$RHS)>>;
4728 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4729 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4730 BinOpFrag<(Neon_vduplane
4731 (Neon_low4S node:$LHS), node:$RHS)>>;
4733 // Index can only be half of the max value for lane in 64-bit vector
4735 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4736 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
4737 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4739 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4740 op, VPR64, VPR64, v2i64, v2i32, v2i32,
4741 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4743 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4744 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4745 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4747 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4748 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4749 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4752 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
4753 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
4754 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
4756 multiclass NI_qdma<SDPatternOperator op>
4758 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
4760 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
4762 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
4764 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
4767 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
4768 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
4770 multiclass NI_2VEL_v3_qdma_pat<string subop, string op>
4772 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4773 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
4774 v4i32, v4i16, v8i16,
4775 BinOpFrag<(Neon_vduplane
4776 (Neon_low8H node:$LHS), node:$RHS)>>;
4778 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4779 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
4780 v2i64, v2i32, v4i32,
4781 BinOpFrag<(Neon_vduplane
4782 (Neon_low4S node:$LHS), node:$RHS)>>;
4784 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4785 !cast<PatFrag>(op # "_4s"), VPR128Lo,
4786 v4i32, v8i16, v8i16, v4i16, Neon_High8H,
4787 BinOpFrag<(Neon_vduplane
4788 (Neon_low8H node:$LHS), node:$RHS)>>;
4790 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4791 !cast<PatFrag>(op # "_2d"), VPR128,
4792 v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4793 BinOpFrag<(Neon_vduplane
4794 (Neon_low4S node:$LHS), node:$RHS)>>;
4796 // Index can only be half of the max value for lane in 64-bit vector
4798 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4799 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
4800 v4i32, v4i16, v4i16,
4801 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4803 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4804 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
4805 v2i64, v2i32, v2i32,
4806 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4808 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4809 !cast<PatFrag>(op # "_4s"), VPR64Lo,
4810 v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4811 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4813 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4814 !cast<PatFrag>(op # "_2d"), VPR64,
4815 v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4816 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4819 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
4820 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
4822 // End of implementation for instruction class (3V Elem)
4824 //Insert element (vector, from main)
4825 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
4827 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
4829 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
4831 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
4833 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
4835 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
4837 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
4839 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
4842 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
4843 RegisterClass OpGPR, ValueType OpTy,
4844 Operand OpImm, Instruction INS>
4845 : Pat<(ResTy (vector_insert
4849 (ResTy (EXTRACT_SUBREG
4850 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
4851 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
4853 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
4854 neon_uimm3_bare, INSbw>;
4855 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
4856 neon_uimm2_bare, INShw>;
4857 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
4858 neon_uimm1_bare, INSsw>;
4859 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
4860 neon_uimm0_bare, INSdx>;
4862 class NeonI_INS_element<string asmop, string Res, ValueType ResTy,
4863 Operand ResImm, ValueType MidTy>
4864 : NeonI_insert<0b1, 0b1,
4865 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
4866 ResImm:$Immd, ResImm:$Immn),
4867 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
4868 [(set (ResTy VPR128:$Rd),
4869 (ResTy (vector_insert
4870 (ResTy VPR128:$src),
4871 (MidTy (vector_extract
4876 let Constraints = "$src = $Rd";
4881 //Insert element (vector, from element)
4882 def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> {
4883 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
4884 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
4886 def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> {
4887 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
4888 let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
4889 // bit 11 is unspecified.
4891 def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> {
4892 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
4893 let Inst{14-13} = {Immn{1}, Immn{0}};
4894 // bits 11-12 are unspecified.
4896 def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> {
4897 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
4898 let Inst{14} = Immn{0};
4899 // bits 11-13 are unspecified.
4902 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
4904 RegisterClass OpFPR, Operand ResImm,
4905 SubRegIndex SubIndex, Instruction INS> {
4906 def : Pat<(ResTy (vector_insert
4907 (ResTy VPR128:$src),
4908 (MidTy (vector_extract
4912 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
4913 ResImm:$Immd, ResImm:$Immn)>;
4915 def : Pat <(ResTy (vector_insert
4916 (ResTy VPR128:$src),
4919 (INS (ResTy VPR128:$src),
4920 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
4924 def : Pat <(NaTy (vector_insert
4928 (NaTy (EXTRACT_SUBREG
4930 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
4931 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
4937 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
4939 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
4942 multiclass Neon_INS_elt_pattern <ValueType NaTy, Operand NaImm,
4943 ValueType MidTy, ValueType StTy,
4944 Operand StImm, Instruction INS> {
4945 def : Pat<(NaTy (vector_insert
4947 (MidTy (vector_extract
4951 (NaTy (EXTRACT_SUBREG
4953 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
4959 def : Pat<(StTy (vector_insert
4961 (MidTy (vector_extract
4967 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4971 def : Pat<(NaTy (vector_insert
4973 (MidTy (vector_extract
4977 (NaTy (EXTRACT_SUBREG
4979 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
4980 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4986 defm : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
4987 v16i8, neon_uimm4_bare, INSELb>;
4988 defm : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
4989 v8i16, neon_uimm3_bare, INSELh>;
4990 defm : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
4991 v4i32, neon_uimm2_bare, INSELs>;
4992 defm : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
4993 v2i64, neon_uimm1_bare, INSELd>;
4996 class NeonI_SMOV<string asmop, string Res, bit Q,
4997 ValueType OpTy, ValueType eleTy,
4998 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
4999 : NeonI_copy<Q, 0b0, 0b0101,
5000 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
5001 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
5002 [(set (ResTy ResGPR:$Rd),
5004 (ResTy (vector_extract
5005 (OpTy VPR128:$Rn), (OpImm:$Imm))),
5011 //Signed integer move (main, from element)
5012 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
5014 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5016 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
5018 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5020 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
5022 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5024 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
5026 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5028 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
5030 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5033 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
5034 ValueType eleTy, Operand StImm, Operand NaImm,
5035 Instruction SMOVI> {
5036 def : Pat<(i64 (sext_inreg
5038 (i32 (vector_extract
5039 (StTy VPR128:$Rn), (StImm:$Imm))))),
5041 (SMOVI VPR128:$Rn, StImm:$Imm)>;
5043 def : Pat<(i64 (sext
5044 (i32 (vector_extract
5045 (StTy VPR128:$Rn), (StImm:$Imm))))),
5046 (SMOVI VPR128:$Rn, StImm:$Imm)>;
5048 def : Pat<(i64 (sext_inreg
5049 (i64 (vector_extract
5050 (NaTy VPR64:$Rn), (NaImm:$Imm))),
5052 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5055 def : Pat<(i64 (sext_inreg
5057 (i32 (vector_extract
5058 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
5060 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5063 def : Pat<(i64 (sext
5064 (i32 (vector_extract
5065 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
5066 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5070 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
5071 neon_uimm3_bare, SMOVxb>;
5072 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
5073 neon_uimm2_bare, SMOVxh>;
5074 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5075 neon_uimm1_bare, SMOVxs>;
5077 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
5078 ValueType eleTy, Operand StImm, Operand NaImm,
5080 : Pat<(i32 (sext_inreg
5081 (i32 (vector_extract
5082 (NaTy VPR64:$Rn), (NaImm:$Imm))),
5084 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5087 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
5088 neon_uimm3_bare, SMOVwb>;
5089 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
5090 neon_uimm2_bare, SMOVwh>;
5092 class NeonI_UMOV<string asmop, string Res, bit Q,
5093 ValueType OpTy, Operand OpImm,
5094 RegisterClass ResGPR, ValueType ResTy>
5095 : NeonI_copy<Q, 0b0, 0b0111,
5096 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
5097 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
5098 [(set (ResTy ResGPR:$Rd),
5099 (ResTy (vector_extract
5100 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
5105 //Unsigned integer move (main, from element)
5106 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
5108 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5110 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
5112 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5114 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
5116 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5118 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
5120 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5123 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
5124 Operand StImm, Operand NaImm,
5126 : Pat<(ResTy (vector_extract
5127 (NaTy VPR64:$Rn), NaImm:$Imm)),
5128 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5131 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
5132 neon_uimm3_bare, UMOVwb>;
5133 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
5134 neon_uimm2_bare, UMOVwh>;
5135 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5136 neon_uimm1_bare, UMOVws>;
5139 (i32 (vector_extract
5140 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
5142 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
5145 (i32 (vector_extract
5146 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
5148 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
5150 def : Pat<(i64 (zext
5151 (i32 (vector_extract
5152 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
5153 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
5156 (i32 (vector_extract
5157 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
5159 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5160 neon_uimm3_bare:$Imm)>;
5163 (i32 (vector_extract
5164 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
5166 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5167 neon_uimm2_bare:$Imm)>;
5169 def : Pat<(i64 (zext
5170 (i32 (vector_extract
5171 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
5172 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5173 neon_uimm0_bare:$Imm)>;
5175 // Additional copy patterns for scalar types
5176 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
5178 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
5180 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
5182 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
5184 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
5185 (FMOVws FPR32:$Rn)>;
5187 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
5188 (FMOVxd FPR64:$Rn)>;
5190 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
5193 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
5196 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
5197 (v1i8 (EXTRACT_SUBREG (v16i8
5198 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
5201 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
5202 (v1i16 (EXTRACT_SUBREG (v8i16
5203 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
5206 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
5209 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
5212 def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
5214 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
5217 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
5220 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
5221 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
5222 (f64 FPR64:$src), sub_64)>;
5224 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
5225 RegisterOperand ResVPR, ValueType ResTy,
5226 ValueType OpTy, Operand OpImm>
5227 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
5228 (ins VPR128:$Rn, OpImm:$Imm),
5229 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
5235 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, v16i8, v16i8,
5237 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5240 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, v8i16, v8i16,
5242 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5245 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, v4i32, v4i32,
5247 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5250 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, v2i64, v2i64,
5252 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5255 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, v8i8, v16i8,
5257 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5260 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, v4i16, v8i16,
5262 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5265 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, v2i32, v4i32,
5267 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5270 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
5271 ValueType OpTy,ValueType NaTy,
5272 ValueType ExTy, Operand OpLImm,
5274 def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
5275 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
5277 def : Pat<(ResTy (Neon_vduplane
5278 (NaTy VPR64:$Rn), OpNImm:$Imm)),
5280 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
5282 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
5283 neon_uimm4_bare, neon_uimm3_bare>;
5284 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
5285 neon_uimm4_bare, neon_uimm3_bare>;
5286 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
5287 neon_uimm3_bare, neon_uimm2_bare>;
5288 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
5289 neon_uimm3_bare, neon_uimm2_bare>;
5290 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
5291 neon_uimm2_bare, neon_uimm1_bare>;
5292 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
5293 neon_uimm2_bare, neon_uimm1_bare>;
5294 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
5295 neon_uimm1_bare, neon_uimm0_bare>;
5296 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
5297 neon_uimm2_bare, neon_uimm1_bare>;
5298 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
5299 neon_uimm2_bare, neon_uimm1_bare>;
5300 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
5301 neon_uimm1_bare, neon_uimm0_bare>;
5303 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
5305 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
5307 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
5309 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
5311 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
5313 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
5316 class NeonI_DUP<bit Q, string asmop, string rdlane,
5317 RegisterOperand ResVPR, ValueType ResTy,
5318 RegisterClass OpGPR, ValueType OpTy>
5319 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
5320 asmop # "\t$Rd" # rdlane # ", $Rn",
5321 [(set (ResTy ResVPR:$Rd),
5322 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
5325 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
5327 // bits 17-19 are unspecified.
5330 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
5331 let Inst{17-16} = 0b10;
5332 // bits 18-19 are unspecified.
5335 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
5336 let Inst{18-16} = 0b100;
5337 // bit 19 is unspecified.
5340 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
5341 let Inst{19-16} = 0b1000;
5344 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
5346 // bits 17-19 are unspecified.
5349 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
5350 let Inst{17-16} = 0b10;
5351 // bits 18-19 are unspecified.
5354 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
5355 let Inst{18-16} = 0b100;
5356 // bit 19 is unspecified.
5359 // patterns for CONCAT_VECTORS
5360 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
5361 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
5362 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
5363 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
5365 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5366 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
5369 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
5371 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5375 defm : Concat_Vector_Pattern<v16i8, v8i8>;
5376 defm : Concat_Vector_Pattern<v8i16, v4i16>;
5377 defm : Concat_Vector_Pattern<v4i32, v2i32>;
5378 defm : Concat_Vector_Pattern<v2i64, v1i64>;
5379 defm : Concat_Vector_Pattern<v4f32, v2f32>;
5380 defm : Concat_Vector_Pattern<v2f64, v1f64>;
5382 //patterns for EXTRACT_SUBVECTOR
5383 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
5384 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5385 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
5386 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5387 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
5388 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5389 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
5390 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5391 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
5392 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5393 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
5394 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;