1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the ARM NEON instruction set.
12 //===----------------------------------------------------------------------===//
15 //===----------------------------------------------------------------------===//
16 // NEON-specific Operands.
17 //===----------------------------------------------------------------------===//
18 def nModImm : Operand<i32> {
19 let PrintMethod = "printNEONModImmOperand";
22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
23 def nImmSplatI8 : Operand<i32> {
24 let PrintMethod = "printNEONModImmOperand";
25 let ParserMatchClass = nImmSplatI8AsmOperand;
27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
28 def nImmSplatI16 : Operand<i32> {
29 let PrintMethod = "printNEONModImmOperand";
30 let ParserMatchClass = nImmSplatI16AsmOperand;
32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
33 def nImmSplatI32 : Operand<i32> {
34 let PrintMethod = "printNEONModImmOperand";
35 let ParserMatchClass = nImmSplatI32AsmOperand;
37 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
38 def nImmVMOVI32 : Operand<i32> {
39 let PrintMethod = "printNEONModImmOperand";
40 let ParserMatchClass = nImmVMOVI32AsmOperand;
42 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
43 def nImmVMOVI32Neg : Operand<i32> {
44 let PrintMethod = "printNEONModImmOperand";
45 let ParserMatchClass = nImmVMOVI32NegAsmOperand;
47 def nImmVMOVF32 : Operand<i32> {
48 let PrintMethod = "printFPImmOperand";
49 let ParserMatchClass = FPImmOperand;
51 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
52 def nImmSplatI64 : Operand<i32> {
53 let PrintMethod = "printNEONModImmOperand";
54 let ParserMatchClass = nImmSplatI64AsmOperand;
57 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
58 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
59 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
60 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
61 return ((uint64_t)Imm) < 8;
63 let ParserMatchClass = VectorIndex8Operand;
64 let PrintMethod = "printVectorIndex";
65 let MIOperandInfo = (ops i32imm);
67 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
68 return ((uint64_t)Imm) < 4;
70 let ParserMatchClass = VectorIndex16Operand;
71 let PrintMethod = "printVectorIndex";
72 let MIOperandInfo = (ops i32imm);
74 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
75 return ((uint64_t)Imm) < 2;
77 let ParserMatchClass = VectorIndex32Operand;
78 let PrintMethod = "printVectorIndex";
79 let MIOperandInfo = (ops i32imm);
82 // Register list of one D register.
83 def VecListOneDAsmOperand : AsmOperandClass {
84 let Name = "VecListOneD";
85 let ParserMethod = "parseVectorList";
86 let RenderMethod = "addVecListOperands";
88 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
89 let ParserMatchClass = VecListOneDAsmOperand;
91 // Register list of two sequential D registers.
92 def VecListDPairAsmOperand : AsmOperandClass {
93 let Name = "VecListDPair";
94 let ParserMethod = "parseVectorList";
95 let RenderMethod = "addVecListOperands";
97 def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
98 let ParserMatchClass = VecListDPairAsmOperand;
100 // Register list of three sequential D registers.
101 def VecListThreeDAsmOperand : AsmOperandClass {
102 let Name = "VecListThreeD";
103 let ParserMethod = "parseVectorList";
104 let RenderMethod = "addVecListOperands";
106 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
107 let ParserMatchClass = VecListThreeDAsmOperand;
109 // Register list of four sequential D registers.
110 def VecListFourDAsmOperand : AsmOperandClass {
111 let Name = "VecListFourD";
112 let ParserMethod = "parseVectorList";
113 let RenderMethod = "addVecListOperands";
115 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
116 let ParserMatchClass = VecListFourDAsmOperand;
118 // Register list of two D registers spaced by 2 (two sequential Q registers).
119 def VecListDPairSpacedAsmOperand : AsmOperandClass {
120 let Name = "VecListDPairSpaced";
121 let ParserMethod = "parseVectorList";
122 let RenderMethod = "addVecListOperands";
124 def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
125 let ParserMatchClass = VecListDPairSpacedAsmOperand;
127 // Register list of three D registers spaced by 2 (three Q registers).
128 def VecListThreeQAsmOperand : AsmOperandClass {
129 let Name = "VecListThreeQ";
130 let ParserMethod = "parseVectorList";
131 let RenderMethod = "addVecListOperands";
133 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
134 let ParserMatchClass = VecListThreeQAsmOperand;
136 // Register list of three D registers spaced by 2 (three Q registers).
137 def VecListFourQAsmOperand : AsmOperandClass {
138 let Name = "VecListFourQ";
139 let ParserMethod = "parseVectorList";
140 let RenderMethod = "addVecListOperands";
142 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
143 let ParserMatchClass = VecListFourQAsmOperand;
146 // Register list of one D register, with "all lanes" subscripting.
147 def VecListOneDAllLanesAsmOperand : AsmOperandClass {
148 let Name = "VecListOneDAllLanes";
149 let ParserMethod = "parseVectorList";
150 let RenderMethod = "addVecListOperands";
152 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
153 let ParserMatchClass = VecListOneDAllLanesAsmOperand;
155 // Register list of two D registers, with "all lanes" subscripting.
156 def VecListDPairAllLanesAsmOperand : AsmOperandClass {
157 let Name = "VecListDPairAllLanes";
158 let ParserMethod = "parseVectorList";
159 let RenderMethod = "addVecListOperands";
161 def VecListDPairAllLanes : RegisterOperand<DPair,
162 "printVectorListTwoAllLanes"> {
163 let ParserMatchClass = VecListDPairAllLanesAsmOperand;
165 // Register list of two D registers spaced by 2 (two sequential Q registers).
166 def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
167 let Name = "VecListDPairSpacedAllLanes";
168 let ParserMethod = "parseVectorList";
169 let RenderMethod = "addVecListOperands";
171 def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
172 "printVectorListTwoSpacedAllLanes"> {
173 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
175 // Register list of three D registers, with "all lanes" subscripting.
176 def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
177 let Name = "VecListThreeDAllLanes";
178 let ParserMethod = "parseVectorList";
179 let RenderMethod = "addVecListOperands";
181 def VecListThreeDAllLanes : RegisterOperand<DPR,
182 "printVectorListThreeAllLanes"> {
183 let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
185 // Register list of three D registers spaced by 2 (three sequential Q regs).
186 def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
187 let Name = "VecListThreeQAllLanes";
188 let ParserMethod = "parseVectorList";
189 let RenderMethod = "addVecListOperands";
191 def VecListThreeQAllLanes : RegisterOperand<DPR,
192 "printVectorListThreeSpacedAllLanes"> {
193 let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
195 // Register list of four D registers, with "all lanes" subscripting.
196 def VecListFourDAllLanesAsmOperand : AsmOperandClass {
197 let Name = "VecListFourDAllLanes";
198 let ParserMethod = "parseVectorList";
199 let RenderMethod = "addVecListOperands";
201 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
202 let ParserMatchClass = VecListFourDAllLanesAsmOperand;
204 // Register list of four D registers spaced by 2 (four sequential Q regs).
205 def VecListFourQAllLanesAsmOperand : AsmOperandClass {
206 let Name = "VecListFourQAllLanes";
207 let ParserMethod = "parseVectorList";
208 let RenderMethod = "addVecListOperands";
210 def VecListFourQAllLanes : RegisterOperand<DPR,
211 "printVectorListFourSpacedAllLanes"> {
212 let ParserMatchClass = VecListFourQAllLanesAsmOperand;
216 // Register list of one D register, with byte lane subscripting.
217 def VecListOneDByteIndexAsmOperand : AsmOperandClass {
218 let Name = "VecListOneDByteIndexed";
219 let ParserMethod = "parseVectorList";
220 let RenderMethod = "addVecListIndexedOperands";
222 def VecListOneDByteIndexed : Operand<i32> {
223 let ParserMatchClass = VecListOneDByteIndexAsmOperand;
224 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
226 // ...with half-word lane subscripting.
227 def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
228 let Name = "VecListOneDHWordIndexed";
229 let ParserMethod = "parseVectorList";
230 let RenderMethod = "addVecListIndexedOperands";
232 def VecListOneDHWordIndexed : Operand<i32> {
233 let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
234 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
236 // ...with word lane subscripting.
237 def VecListOneDWordIndexAsmOperand : AsmOperandClass {
238 let Name = "VecListOneDWordIndexed";
239 let ParserMethod = "parseVectorList";
240 let RenderMethod = "addVecListIndexedOperands";
242 def VecListOneDWordIndexed : Operand<i32> {
243 let ParserMatchClass = VecListOneDWordIndexAsmOperand;
244 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
247 // Register list of two D registers with byte lane subscripting.
248 def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
249 let Name = "VecListTwoDByteIndexed";
250 let ParserMethod = "parseVectorList";
251 let RenderMethod = "addVecListIndexedOperands";
253 def VecListTwoDByteIndexed : Operand<i32> {
254 let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
255 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
257 // ...with half-word lane subscripting.
258 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
259 let Name = "VecListTwoDHWordIndexed";
260 let ParserMethod = "parseVectorList";
261 let RenderMethod = "addVecListIndexedOperands";
263 def VecListTwoDHWordIndexed : Operand<i32> {
264 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
267 // ...with word lane subscripting.
268 def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
269 let Name = "VecListTwoDWordIndexed";
270 let ParserMethod = "parseVectorList";
271 let RenderMethod = "addVecListIndexedOperands";
273 def VecListTwoDWordIndexed : Operand<i32> {
274 let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
277 // Register list of two Q registers with half-word lane subscripting.
278 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
279 let Name = "VecListTwoQHWordIndexed";
280 let ParserMethod = "parseVectorList";
281 let RenderMethod = "addVecListIndexedOperands";
283 def VecListTwoQHWordIndexed : Operand<i32> {
284 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
287 // ...with word lane subscripting.
288 def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
289 let Name = "VecListTwoQWordIndexed";
290 let ParserMethod = "parseVectorList";
291 let RenderMethod = "addVecListIndexedOperands";
293 def VecListTwoQWordIndexed : Operand<i32> {
294 let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
299 // Register list of three D registers with byte lane subscripting.
300 def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
301 let Name = "VecListThreeDByteIndexed";
302 let ParserMethod = "parseVectorList";
303 let RenderMethod = "addVecListIndexedOperands";
305 def VecListThreeDByteIndexed : Operand<i32> {
306 let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
307 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
309 // ...with half-word lane subscripting.
310 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
311 let Name = "VecListThreeDHWordIndexed";
312 let ParserMethod = "parseVectorList";
313 let RenderMethod = "addVecListIndexedOperands";
315 def VecListThreeDHWordIndexed : Operand<i32> {
316 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
317 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
319 // ...with word lane subscripting.
320 def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
321 let Name = "VecListThreeDWordIndexed";
322 let ParserMethod = "parseVectorList";
323 let RenderMethod = "addVecListIndexedOperands";
325 def VecListThreeDWordIndexed : Operand<i32> {
326 let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
327 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
329 // Register list of three Q registers with half-word lane subscripting.
330 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
331 let Name = "VecListThreeQHWordIndexed";
332 let ParserMethod = "parseVectorList";
333 let RenderMethod = "addVecListIndexedOperands";
335 def VecListThreeQHWordIndexed : Operand<i32> {
336 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
337 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
339 // ...with word lane subscripting.
340 def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
341 let Name = "VecListThreeQWordIndexed";
342 let ParserMethod = "parseVectorList";
343 let RenderMethod = "addVecListIndexedOperands";
345 def VecListThreeQWordIndexed : Operand<i32> {
346 let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
350 // Register list of four D registers with byte lane subscripting.
351 def VecListFourDByteIndexAsmOperand : AsmOperandClass {
352 let Name = "VecListFourDByteIndexed";
353 let ParserMethod = "parseVectorList";
354 let RenderMethod = "addVecListIndexedOperands";
356 def VecListFourDByteIndexed : Operand<i32> {
357 let ParserMatchClass = VecListFourDByteIndexAsmOperand;
358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
360 // ...with half-word lane subscripting.
361 def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
362 let Name = "VecListFourDHWordIndexed";
363 let ParserMethod = "parseVectorList";
364 let RenderMethod = "addVecListIndexedOperands";
366 def VecListFourDHWordIndexed : Operand<i32> {
367 let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
370 // ...with word lane subscripting.
371 def VecListFourDWordIndexAsmOperand : AsmOperandClass {
372 let Name = "VecListFourDWordIndexed";
373 let ParserMethod = "parseVectorList";
374 let RenderMethod = "addVecListIndexedOperands";
376 def VecListFourDWordIndexed : Operand<i32> {
377 let ParserMatchClass = VecListFourDWordIndexAsmOperand;
378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
380 // Register list of four Q registers with half-word lane subscripting.
381 def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
382 let Name = "VecListFourQHWordIndexed";
383 let ParserMethod = "parseVectorList";
384 let RenderMethod = "addVecListIndexedOperands";
386 def VecListFourQHWordIndexed : Operand<i32> {
387 let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
390 // ...with word lane subscripting.
391 def VecListFourQWordIndexAsmOperand : AsmOperandClass {
392 let Name = "VecListFourQWordIndexed";
393 let ParserMethod = "parseVectorList";
394 let RenderMethod = "addVecListIndexedOperands";
396 def VecListFourQWordIndexed : Operand<i32> {
397 let ParserMatchClass = VecListFourQWordIndexAsmOperand;
398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
401 def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
402 return cast<LoadSDNode>(N)->getAlignment() == 2;
404 def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
405 (store node:$val, node:$ptr), [{
406 return cast<StoreSDNode>(N)->getAlignment() == 2;
408 def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
409 return cast<LoadSDNode>(N)->getAlignment() == 1;
411 def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
412 (store node:$val, node:$ptr), [{
413 return cast<StoreSDNode>(N)->getAlignment() == 1;
415 def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
416 return cast<LoadSDNode>(N)->getAlignment() < 4;
418 def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
419 (store node:$val, node:$ptr), [{
420 return cast<StoreSDNode>(N)->getAlignment() < 4;
423 //===----------------------------------------------------------------------===//
424 // NEON-specific DAG Nodes.
425 //===----------------------------------------------------------------------===//
427 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
428 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
430 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
431 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
432 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
433 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
434 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
435 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
436 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
437 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
438 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
439 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
440 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
442 // Types for vector shift by immediates. The "SHX" version is for long and
443 // narrow operations where the source and destination vectors have different
444 // types. The "SHINS" version is for shift and insert operations.
445 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
447 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
449 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
450 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
452 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
453 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
454 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
455 def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
456 def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
457 def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
458 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
460 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
461 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
462 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
464 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
465 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
466 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
467 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
468 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
469 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
471 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
472 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
473 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
475 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
476 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
478 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
480 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
481 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
483 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
484 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
485 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
486 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
488 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
490 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
491 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
493 def NEONvbsl : SDNode<"ARMISD::VBSL",
494 SDTypeProfile<1, 3, [SDTCisVec<0>,
497 SDTCisSameAs<0, 3>]>>;
499 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
501 // VDUPLANE can produce a quad-register result from a double-register source,
502 // so the result is not constrained to match the source.
503 def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
504 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
507 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
508 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
509 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
511 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
512 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
513 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
514 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
516 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
518 SDTCisSameAs<0, 3>]>;
519 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
520 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
521 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
523 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
524 SDTCisSameAs<1, 2>]>;
525 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
526 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
528 def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
529 SDTCisSameAs<0, 2>]>;
530 def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
531 def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
533 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
534 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
535 unsigned EltBits = 0;
536 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
537 return (EltBits == 32 && EltVal == 0);
540 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
541 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
542 unsigned EltBits = 0;
543 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
544 return (EltBits == 8 && EltVal == 0xff);
547 //===----------------------------------------------------------------------===//
548 // NEON load / store instructions
549 //===----------------------------------------------------------------------===//
551 // Use VLDM to load a Q register as a D register pair.
552 // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
554 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
556 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
558 // Use VSTM to store a Q register as a D register pair.
559 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
561 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
563 [(store (v2f64 DPair:$src), GPR:$Rn)]>;
565 // Classes for VLD* pseudo-instructions with multi-register operands.
566 // These are expanded to real instructions after register allocation.
567 class VLDQPseudo<InstrItinClass itin>
568 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
569 class VLDQWBPseudo<InstrItinClass itin>
570 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
571 (ins addrmode6:$addr, am6offset:$offset), itin,
573 class VLDQWBfixedPseudo<InstrItinClass itin>
574 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
575 (ins addrmode6:$addr), itin,
577 class VLDQWBregisterPseudo<InstrItinClass itin>
578 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
579 (ins addrmode6:$addr, rGPR:$offset), itin,
582 class VLDQQPseudo<InstrItinClass itin>
583 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
584 class VLDQQWBPseudo<InstrItinClass itin>
585 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
586 (ins addrmode6:$addr, am6offset:$offset), itin,
588 class VLDQQWBfixedPseudo<InstrItinClass itin>
589 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
590 (ins addrmode6:$addr), itin,
592 class VLDQQWBregisterPseudo<InstrItinClass itin>
593 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
594 (ins addrmode6:$addr, rGPR:$offset), itin,
598 class VLDQQQQPseudo<InstrItinClass itin>
599 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
601 class VLDQQQQWBPseudo<InstrItinClass itin>
602 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
603 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
604 "$addr.addr = $wb, $src = $dst">;
606 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
608 // VLD1 : Vector Load (multiple single elements)
609 class VLD1D<bits<4> op7_4, string Dt>
610 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
611 (ins addrmode6:$Rn), IIC_VLD1,
612 "vld1", Dt, "$Vd, $Rn", "", []> {
615 let DecoderMethod = "DecodeVLDInstruction";
617 class VLD1Q<bits<4> op7_4, string Dt>
618 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
619 (ins addrmode6:$Rn), IIC_VLD1x2,
620 "vld1", Dt, "$Vd, $Rn", "", []> {
622 let Inst{5-4} = Rn{5-4};
623 let DecoderMethod = "DecodeVLDInstruction";
626 def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
627 def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
628 def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
629 def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
631 def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
632 def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
633 def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
634 def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
636 // ...with address register writeback:
637 multiclass VLD1DWB<bits<4> op7_4, string Dt> {
638 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
639 (ins addrmode6:$Rn), IIC_VLD1u,
640 "vld1", Dt, "$Vd, $Rn!",
641 "$Rn.addr = $wb", []> {
642 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
644 let DecoderMethod = "DecodeVLDInstruction";
645 let AsmMatchConverter = "cvtVLDwbFixed";
647 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
648 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
649 "vld1", Dt, "$Vd, $Rn, $Rm",
650 "$Rn.addr = $wb", []> {
652 let DecoderMethod = "DecodeVLDInstruction";
653 let AsmMatchConverter = "cvtVLDwbRegister";
656 multiclass VLD1QWB<bits<4> op7_4, string Dt> {
657 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
658 (ins addrmode6:$Rn), IIC_VLD1x2u,
659 "vld1", Dt, "$Vd, $Rn!",
660 "$Rn.addr = $wb", []> {
661 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
662 let Inst{5-4} = Rn{5-4};
663 let DecoderMethod = "DecodeVLDInstruction";
664 let AsmMatchConverter = "cvtVLDwbFixed";
666 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
667 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
668 "vld1", Dt, "$Vd, $Rn, $Rm",
669 "$Rn.addr = $wb", []> {
670 let Inst{5-4} = Rn{5-4};
671 let DecoderMethod = "DecodeVLDInstruction";
672 let AsmMatchConverter = "cvtVLDwbRegister";
676 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
677 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
678 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
679 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
680 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
681 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
682 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
683 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
685 // ...with 3 registers
686 class VLD1D3<bits<4> op7_4, string Dt>
687 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
688 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
689 "$Vd, $Rn", "", []> {
692 let DecoderMethod = "DecodeVLDInstruction";
694 multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
695 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
696 (ins addrmode6:$Rn), IIC_VLD1x2u,
697 "vld1", Dt, "$Vd, $Rn!",
698 "$Rn.addr = $wb", []> {
699 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
701 let DecoderMethod = "DecodeVLDInstruction";
702 let AsmMatchConverter = "cvtVLDwbFixed";
704 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
705 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
706 "vld1", Dt, "$Vd, $Rn, $Rm",
707 "$Rn.addr = $wb", []> {
709 let DecoderMethod = "DecodeVLDInstruction";
710 let AsmMatchConverter = "cvtVLDwbRegister";
714 def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
715 def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
716 def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
717 def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
719 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
720 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
721 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
722 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
724 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
726 // ...with 4 registers
727 class VLD1D4<bits<4> op7_4, string Dt>
728 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
729 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
730 "$Vd, $Rn", "", []> {
732 let Inst{5-4} = Rn{5-4};
733 let DecoderMethod = "DecodeVLDInstruction";
735 multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
736 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
737 (ins addrmode6:$Rn), IIC_VLD1x2u,
738 "vld1", Dt, "$Vd, $Rn!",
739 "$Rn.addr = $wb", []> {
740 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
741 let Inst{5-4} = Rn{5-4};
742 let DecoderMethod = "DecodeVLDInstruction";
743 let AsmMatchConverter = "cvtVLDwbFixed";
745 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
746 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
747 "vld1", Dt, "$Vd, $Rn, $Rm",
748 "$Rn.addr = $wb", []> {
749 let Inst{5-4} = Rn{5-4};
750 let DecoderMethod = "DecodeVLDInstruction";
751 let AsmMatchConverter = "cvtVLDwbRegister";
755 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
756 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
757 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
758 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
760 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
761 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
762 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
763 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
765 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
767 // VLD2 : Vector Load (multiple 2-element structures)
768 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
770 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
771 (ins addrmode6:$Rn), itin,
772 "vld2", Dt, "$Vd, $Rn", "", []> {
774 let Inst{5-4} = Rn{5-4};
775 let DecoderMethod = "DecodeVLDInstruction";
778 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
779 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
780 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
782 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
783 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
784 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
786 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
787 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
788 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
790 // ...with address register writeback:
791 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
792 RegisterOperand VdTy, InstrItinClass itin> {
793 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
794 (ins addrmode6:$Rn), itin,
795 "vld2", Dt, "$Vd, $Rn!",
796 "$Rn.addr = $wb", []> {
797 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
798 let Inst{5-4} = Rn{5-4};
799 let DecoderMethod = "DecodeVLDInstruction";
800 let AsmMatchConverter = "cvtVLDwbFixed";
802 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
803 (ins addrmode6:$Rn, rGPR:$Rm), itin,
804 "vld2", Dt, "$Vd, $Rn, $Rm",
805 "$Rn.addr = $wb", []> {
806 let Inst{5-4} = Rn{5-4};
807 let DecoderMethod = "DecodeVLDInstruction";
808 let AsmMatchConverter = "cvtVLDwbRegister";
812 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
813 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
814 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
816 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
817 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
818 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
820 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
821 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
822 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
823 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
824 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
825 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
827 // ...with double-spaced registers
828 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
829 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
830 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
831 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
832 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
833 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
835 // VLD3 : Vector Load (multiple 3-element structures)
836 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
837 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
838 (ins addrmode6:$Rn), IIC_VLD3,
839 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
842 let DecoderMethod = "DecodeVLDInstruction";
845 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
846 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
847 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
849 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
850 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
851 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
853 // ...with address register writeback:
854 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
855 : NLdSt<0, 0b10, op11_8, op7_4,
856 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
857 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
858 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
859 "$Rn.addr = $wb", []> {
861 let DecoderMethod = "DecodeVLDInstruction";
864 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
865 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
866 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
868 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
869 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
870 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
872 // ...with double-spaced registers:
873 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
874 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
875 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
876 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
877 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
878 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
880 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
881 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
882 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
884 // ...alternate versions to be allocated odd register numbers:
885 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
886 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
887 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
889 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
890 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
891 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
893 // VLD4 : Vector Load (multiple 4-element structures)
894 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
895 : NLdSt<0, 0b10, op11_8, op7_4,
896 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
897 (ins addrmode6:$Rn), IIC_VLD4,
898 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
900 let Inst{5-4} = Rn{5-4};
901 let DecoderMethod = "DecodeVLDInstruction";
904 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
905 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
906 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
908 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
909 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
910 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
912 // ...with address register writeback:
913 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
914 : NLdSt<0, 0b10, op11_8, op7_4,
915 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
916 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
917 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
918 "$Rn.addr = $wb", []> {
919 let Inst{5-4} = Rn{5-4};
920 let DecoderMethod = "DecodeVLDInstruction";
923 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
924 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
925 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
927 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
928 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
929 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
931 // ...with double-spaced registers:
932 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
933 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
934 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
935 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
936 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
937 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
939 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
940 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
941 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
943 // ...alternate versions to be allocated odd register numbers:
944 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
945 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
946 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
948 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
949 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
950 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
952 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
954 // Classes for VLD*LN pseudo-instructions with multi-register operands.
955 // These are expanded to real instructions after register allocation.
956 class VLDQLNPseudo<InstrItinClass itin>
957 : PseudoNLdSt<(outs QPR:$dst),
958 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
959 itin, "$src = $dst">;
960 class VLDQLNWBPseudo<InstrItinClass itin>
961 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
962 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
963 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
964 class VLDQQLNPseudo<InstrItinClass itin>
965 : PseudoNLdSt<(outs QQPR:$dst),
966 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
967 itin, "$src = $dst">;
968 class VLDQQLNWBPseudo<InstrItinClass itin>
969 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
970 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
971 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
972 class VLDQQQQLNPseudo<InstrItinClass itin>
973 : PseudoNLdSt<(outs QQQQPR:$dst),
974 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
975 itin, "$src = $dst">;
976 class VLDQQQQLNWBPseudo<InstrItinClass itin>
977 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
978 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
979 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
981 // VLD1LN : Vector Load (single element to one lane)
982 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
984 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
985 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
986 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
988 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
989 (i32 (LoadOp addrmode6:$Rn)),
992 let DecoderMethod = "DecodeVLD1LN";
994 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
996 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
997 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
998 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1000 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1001 (i32 (LoadOp addrmode6oneL32:$Rn)),
1004 let DecoderMethod = "DecodeVLD1LN";
1006 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
1007 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1008 (i32 (LoadOp addrmode6:$addr)),
1012 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1013 let Inst{7-5} = lane{2-0};
1015 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1016 let Inst{7-6} = lane{1-0};
1017 let Inst{5-4} = Rn{5-4};
1019 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1020 let Inst{7} = lane{0};
1021 let Inst{5-4} = Rn{5-4};
1024 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
1025 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1026 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1028 def : Pat<(vector_insert (v2f32 DPR:$src),
1029 (f32 (load addrmode6:$addr)), imm:$lane),
1030 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1031 def : Pat<(vector_insert (v4f32 QPR:$src),
1032 (f32 (load addrmode6:$addr)), imm:$lane),
1033 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1035 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
1037 // ...with address register writeback:
1038 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1039 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1040 (ins addrmode6:$Rn, am6offset:$Rm,
1041 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1042 "\\{$Vd[$lane]\\}, $Rn$Rm",
1043 "$src = $Vd, $Rn.addr = $wb", []> {
1044 let DecoderMethod = "DecodeVLD1LN";
1047 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1048 let Inst{7-5} = lane{2-0};
1050 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1051 let Inst{7-6} = lane{1-0};
1052 let Inst{4} = Rn{4};
1054 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1055 let Inst{7} = lane{0};
1056 let Inst{5} = Rn{4};
1057 let Inst{4} = Rn{4};
1060 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
1061 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
1062 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
1064 // VLD2LN : Vector Load (single 2-element structure to one lane)
1065 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1066 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1067 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1068 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1069 "$src1 = $Vd, $src2 = $dst2", []> {
1071 let Inst{4} = Rn{4};
1072 let DecoderMethod = "DecodeVLD2LN";
1075 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1076 let Inst{7-5} = lane{2-0};
1078 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1079 let Inst{7-6} = lane{1-0};
1081 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1082 let Inst{7} = lane{0};
1085 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
1086 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
1087 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
1089 // ...with double-spaced registers:
1090 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1091 let Inst{7-6} = lane{1-0};
1093 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1094 let Inst{7} = lane{0};
1097 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
1098 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
1100 // ...with address register writeback:
1101 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1102 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1103 (ins addrmode6:$Rn, am6offset:$Rm,
1104 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1105 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1106 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1107 let Inst{4} = Rn{4};
1108 let DecoderMethod = "DecodeVLD2LN";
1111 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1112 let Inst{7-5} = lane{2-0};
1114 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1115 let Inst{7-6} = lane{1-0};
1117 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1118 let Inst{7} = lane{0};
1121 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
1122 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
1123 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
1125 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1126 let Inst{7-6} = lane{1-0};
1128 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1129 let Inst{7} = lane{0};
1132 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
1133 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
1135 // VLD3LN : Vector Load (single 3-element structure to one lane)
1136 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1137 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1138 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1139 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1140 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1141 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
1143 let DecoderMethod = "DecodeVLD3LN";
1146 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1147 let Inst{7-5} = lane{2-0};
1149 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1150 let Inst{7-6} = lane{1-0};
1152 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1153 let Inst{7} = lane{0};
1156 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
1157 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
1158 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
1160 // ...with double-spaced registers:
1161 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1162 let Inst{7-6} = lane{1-0};
1164 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1165 let Inst{7} = lane{0};
1168 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
1169 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
1171 // ...with address register writeback:
1172 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1173 : NLdStLn<1, 0b10, op11_8, op7_4,
1174 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1175 (ins addrmode6:$Rn, am6offset:$Rm,
1176 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1177 IIC_VLD3lnu, "vld3", Dt,
1178 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1179 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1181 let DecoderMethod = "DecodeVLD3LN";
1184 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1185 let Inst{7-5} = lane{2-0};
1187 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1188 let Inst{7-6} = lane{1-0};
1190 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1191 let Inst{7} = lane{0};
1194 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1195 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1196 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1198 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1199 let Inst{7-6} = lane{1-0};
1201 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1202 let Inst{7} = lane{0};
1205 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
1206 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
1208 // VLD4LN : Vector Load (single 4-element structure to one lane)
1209 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1210 : NLdStLn<1, 0b10, op11_8, op7_4,
1211 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1212 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1213 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1214 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1215 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
1217 let Inst{4} = Rn{4};
1218 let DecoderMethod = "DecodeVLD4LN";
1221 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1222 let Inst{7-5} = lane{2-0};
1224 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1225 let Inst{7-6} = lane{1-0};
1227 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1228 let Inst{7} = lane{0};
1229 let Inst{5} = Rn{5};
1232 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
1233 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
1234 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
1236 // ...with double-spaced registers:
1237 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1238 let Inst{7-6} = lane{1-0};
1240 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1241 let Inst{7} = lane{0};
1242 let Inst{5} = Rn{5};
1245 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
1246 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
1248 // ...with address register writeback:
1249 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1250 : NLdStLn<1, 0b10, op11_8, op7_4,
1251 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1252 (ins addrmode6:$Rn, am6offset:$Rm,
1253 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1254 IIC_VLD4lnu, "vld4", Dt,
1255 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1256 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1258 let Inst{4} = Rn{4};
1259 let DecoderMethod = "DecodeVLD4LN" ;
1262 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1263 let Inst{7-5} = lane{2-0};
1265 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1266 let Inst{7-6} = lane{1-0};
1268 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1269 let Inst{7} = lane{0};
1270 let Inst{5} = Rn{5};
1273 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1274 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1275 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1277 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1278 let Inst{7-6} = lane{1-0};
1280 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1281 let Inst{7} = lane{0};
1282 let Inst{5} = Rn{5};
1285 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
1286 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
1288 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
1290 // VLD1DUP : Vector Load (single element to all lanes)
1291 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
1292 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1293 (ins addrmode6dup:$Rn),
1294 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1295 [(set VecListOneDAllLanes:$Vd,
1296 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
1298 let Inst{4} = Rn{4};
1299 let DecoderMethod = "DecodeVLD1DupInstruction";
1301 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
1302 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
1303 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
1305 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1306 (VLD1DUPd32 addrmode6:$addr)>;
1308 class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
1309 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1310 (ins addrmode6dup:$Rn), IIC_VLD1dup,
1311 "vld1", Dt, "$Vd, $Rn", "",
1312 [(set VecListDPairAllLanes:$Vd,
1313 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
1315 let Inst{4} = Rn{4};
1316 let DecoderMethod = "DecodeVLD1DupInstruction";
1319 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
1320 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
1321 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
1323 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1324 (VLD1DUPq32 addrmode6:$addr)>;
1326 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
1327 // ...with address register writeback:
1328 multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
1329 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1330 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1331 (ins addrmode6dup:$Rn), IIC_VLD1dupu,
1332 "vld1", Dt, "$Vd, $Rn!",
1333 "$Rn.addr = $wb", []> {
1334 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1335 let Inst{4} = Rn{4};
1336 let DecoderMethod = "DecodeVLD1DupInstruction";
1337 let AsmMatchConverter = "cvtVLDwbFixed";
1339 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1340 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1341 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1342 "vld1", Dt, "$Vd, $Rn, $Rm",
1343 "$Rn.addr = $wb", []> {
1344 let Inst{4} = Rn{4};
1345 let DecoderMethod = "DecodeVLD1DupInstruction";
1346 let AsmMatchConverter = "cvtVLDwbRegister";
1349 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
1350 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1351 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1352 (ins addrmode6dup:$Rn), IIC_VLD1dupu,
1353 "vld1", Dt, "$Vd, $Rn!",
1354 "$Rn.addr = $wb", []> {
1355 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1356 let Inst{4} = Rn{4};
1357 let DecoderMethod = "DecodeVLD1DupInstruction";
1358 let AsmMatchConverter = "cvtVLDwbFixed";
1360 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1361 (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1362 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1363 "vld1", Dt, "$Vd, $Rn, $Rm",
1364 "$Rn.addr = $wb", []> {
1365 let Inst{4} = Rn{4};
1366 let DecoderMethod = "DecodeVLD1DupInstruction";
1367 let AsmMatchConverter = "cvtVLDwbRegister";
1371 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
1372 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
1373 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
1375 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
1376 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
1377 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
1379 // VLD2DUP : Vector Load (single 2-element structure to all lanes)
1380 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
1381 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1382 (ins addrmode6dup:$Rn), IIC_VLD2dup,
1383 "vld2", Dt, "$Vd, $Rn", "", []> {
1385 let Inst{4} = Rn{4};
1386 let DecoderMethod = "DecodeVLD2DupInstruction";
1389 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
1390 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
1391 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
1393 // ...with double-spaced registers
1394 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
1395 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
1396 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
1398 // ...with address register writeback:
1399 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
1400 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1401 (outs VdTy:$Vd, GPR:$wb),
1402 (ins addrmode6dup:$Rn), IIC_VLD2dupu,
1403 "vld2", Dt, "$Vd, $Rn!",
1404 "$Rn.addr = $wb", []> {
1405 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1406 let Inst{4} = Rn{4};
1407 let DecoderMethod = "DecodeVLD2DupInstruction";
1408 let AsmMatchConverter = "cvtVLDwbFixed";
1410 def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1411 (outs VdTy:$Vd, GPR:$wb),
1412 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1413 "vld2", Dt, "$Vd, $Rn, $Rm",
1414 "$Rn.addr = $wb", []> {
1415 let Inst{4} = Rn{4};
1416 let DecoderMethod = "DecodeVLD2DupInstruction";
1417 let AsmMatchConverter = "cvtVLDwbRegister";
1421 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
1422 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
1423 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
1425 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
1426 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
1427 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
1429 // VLD3DUP : Vector Load (single 3-element structure to all lanes)
1430 class VLD3DUP<bits<4> op7_4, string Dt>
1431 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1432 (ins addrmode6dup:$Rn), IIC_VLD3dup,
1433 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
1436 let DecoderMethod = "DecodeVLD3DupInstruction";
1439 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
1440 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1441 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1443 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1444 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1445 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1447 // ...with double-spaced registers (not used for codegen):
1448 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
1449 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1450 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1452 // ...with address register writeback:
1453 class VLD3DUPWB<bits<4> op7_4, string Dt>
1454 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1455 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1456 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1457 "$Rn.addr = $wb", []> {
1459 let DecoderMethod = "DecodeVLD3DupInstruction";
1462 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
1463 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
1464 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
1466 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
1467 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
1468 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
1470 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1471 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1472 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1474 // VLD4DUP : Vector Load (single 4-element structure to all lanes)
1475 class VLD4DUP<bits<4> op7_4, string Dt>
1476 : NLdSt<1, 0b10, 0b1111, op7_4,
1477 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1478 (ins addrmode6dup:$Rn), IIC_VLD4dup,
1479 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1481 let Inst{4} = Rn{4};
1482 let DecoderMethod = "DecodeVLD4DupInstruction";
1485 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
1486 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1487 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1489 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1490 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1491 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1493 // ...with double-spaced registers (not used for codegen):
1494 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
1495 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1496 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1498 // ...with address register writeback:
1499 class VLD4DUPWB<bits<4> op7_4, string Dt>
1500 : NLdSt<1, 0b10, 0b1111, op7_4,
1501 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1502 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1503 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1504 "$Rn.addr = $wb", []> {
1505 let Inst{4} = Rn{4};
1506 let DecoderMethod = "DecodeVLD4DupInstruction";
1509 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
1510 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1511 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1513 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
1514 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1515 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1517 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1518 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1519 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1521 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
1523 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
1525 // Classes for VST* pseudo-instructions with multi-register operands.
1526 // These are expanded to real instructions after register allocation.
1527 class VSTQPseudo<InstrItinClass itin>
1528 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1529 class VSTQWBPseudo<InstrItinClass itin>
1530 : PseudoNLdSt<(outs GPR:$wb),
1531 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1532 "$addr.addr = $wb">;
1533 class VSTQWBfixedPseudo<InstrItinClass itin>
1534 : PseudoNLdSt<(outs GPR:$wb),
1535 (ins addrmode6:$addr, QPR:$src), itin,
1536 "$addr.addr = $wb">;
1537 class VSTQWBregisterPseudo<InstrItinClass itin>
1538 : PseudoNLdSt<(outs GPR:$wb),
1539 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1540 "$addr.addr = $wb">;
1541 class VSTQQPseudo<InstrItinClass itin>
1542 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1543 class VSTQQWBPseudo<InstrItinClass itin>
1544 : PseudoNLdSt<(outs GPR:$wb),
1545 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1546 "$addr.addr = $wb">;
1547 class VSTQQWBfixedPseudo<InstrItinClass itin>
1548 : PseudoNLdSt<(outs GPR:$wb),
1549 (ins addrmode6:$addr, QQPR:$src), itin,
1550 "$addr.addr = $wb">;
1551 class VSTQQWBregisterPseudo<InstrItinClass itin>
1552 : PseudoNLdSt<(outs GPR:$wb),
1553 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1554 "$addr.addr = $wb">;
1556 class VSTQQQQPseudo<InstrItinClass itin>
1557 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1558 class VSTQQQQWBPseudo<InstrItinClass itin>
1559 : PseudoNLdSt<(outs GPR:$wb),
1560 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1561 "$addr.addr = $wb">;
1563 // VST1 : Vector Store (multiple single elements)
1564 class VST1D<bits<4> op7_4, string Dt>
1565 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
1566 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
1568 let Inst{4} = Rn{4};
1569 let DecoderMethod = "DecodeVSTInstruction";
1571 class VST1Q<bits<4> op7_4, string Dt>
1572 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
1573 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
1575 let Inst{5-4} = Rn{5-4};
1576 let DecoderMethod = "DecodeVSTInstruction";
1579 def VST1d8 : VST1D<{0,0,0,?}, "8">;
1580 def VST1d16 : VST1D<{0,1,0,?}, "16">;
1581 def VST1d32 : VST1D<{1,0,0,?}, "32">;
1582 def VST1d64 : VST1D<{1,1,0,?}, "64">;
1584 def VST1q8 : VST1Q<{0,0,?,?}, "8">;
1585 def VST1q16 : VST1Q<{0,1,?,?}, "16">;
1586 def VST1q32 : VST1Q<{1,0,?,?}, "32">;
1587 def VST1q64 : VST1Q<{1,1,?,?}, "64">;
1589 // ...with address register writeback:
1590 multiclass VST1DWB<bits<4> op7_4, string Dt> {
1591 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1592 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1593 "vst1", Dt, "$Vd, $Rn!",
1594 "$Rn.addr = $wb", []> {
1595 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1596 let Inst{4} = Rn{4};
1597 let DecoderMethod = "DecodeVSTInstruction";
1598 let AsmMatchConverter = "cvtVSTwbFixed";
1600 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1601 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1603 "vst1", Dt, "$Vd, $Rn, $Rm",
1604 "$Rn.addr = $wb", []> {
1605 let Inst{4} = Rn{4};
1606 let DecoderMethod = "DecodeVSTInstruction";
1607 let AsmMatchConverter = "cvtVSTwbRegister";
1610 multiclass VST1QWB<bits<4> op7_4, string Dt> {
1611 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1612 (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1613 "vst1", Dt, "$Vd, $Rn!",
1614 "$Rn.addr = $wb", []> {
1615 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1616 let Inst{5-4} = Rn{5-4};
1617 let DecoderMethod = "DecodeVSTInstruction";
1618 let AsmMatchConverter = "cvtVSTwbFixed";
1620 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1621 (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1623 "vst1", Dt, "$Vd, $Rn, $Rm",
1624 "$Rn.addr = $wb", []> {
1625 let Inst{5-4} = Rn{5-4};
1626 let DecoderMethod = "DecodeVSTInstruction";
1627 let AsmMatchConverter = "cvtVSTwbRegister";
1631 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
1632 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
1633 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
1634 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
1636 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
1637 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
1638 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
1639 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
1641 // ...with 3 registers
1642 class VST1D3<bits<4> op7_4, string Dt>
1643 : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1644 (ins addrmode6:$Rn, VecListThreeD:$Vd),
1645 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
1647 let Inst{4} = Rn{4};
1648 let DecoderMethod = "DecodeVSTInstruction";
1650 multiclass VST1D3WB<bits<4> op7_4, string Dt> {
1651 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1652 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1653 "vst1", Dt, "$Vd, $Rn!",
1654 "$Rn.addr = $wb", []> {
1655 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1656 let Inst{5-4} = Rn{5-4};
1657 let DecoderMethod = "DecodeVSTInstruction";
1658 let AsmMatchConverter = "cvtVSTwbFixed";
1660 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1661 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1663 "vst1", Dt, "$Vd, $Rn, $Rm",
1664 "$Rn.addr = $wb", []> {
1665 let Inst{5-4} = Rn{5-4};
1666 let DecoderMethod = "DecodeVSTInstruction";
1667 let AsmMatchConverter = "cvtVSTwbRegister";
1671 def VST1d8T : VST1D3<{0,0,0,?}, "8">;
1672 def VST1d16T : VST1D3<{0,1,0,?}, "16">;
1673 def VST1d32T : VST1D3<{1,0,0,?}, "32">;
1674 def VST1d64T : VST1D3<{1,1,0,?}, "64">;
1676 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
1677 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
1678 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
1679 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
1681 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
1682 def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
1683 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
1685 // ...with 4 registers
1686 class VST1D4<bits<4> op7_4, string Dt>
1687 : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1688 (ins addrmode6:$Rn, VecListFourD:$Vd),
1689 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1692 let Inst{5-4} = Rn{5-4};
1693 let DecoderMethod = "DecodeVSTInstruction";
1695 multiclass VST1D4WB<bits<4> op7_4, string Dt> {
1696 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1697 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1698 "vst1", Dt, "$Vd, $Rn!",
1699 "$Rn.addr = $wb", []> {
1700 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1701 let Inst{5-4} = Rn{5-4};
1702 let DecoderMethod = "DecodeVSTInstruction";
1703 let AsmMatchConverter = "cvtVSTwbFixed";
1705 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1706 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1708 "vst1", Dt, "$Vd, $Rn, $Rm",
1709 "$Rn.addr = $wb", []> {
1710 let Inst{5-4} = Rn{5-4};
1711 let DecoderMethod = "DecodeVSTInstruction";
1712 let AsmMatchConverter = "cvtVSTwbRegister";
1716 def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
1717 def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
1718 def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
1719 def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
1721 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
1722 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
1723 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
1724 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
1726 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
1727 def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
1728 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
1730 // VST2 : Vector Store (multiple 2-element structures)
1731 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1732 InstrItinClass itin>
1733 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
1734 itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1736 let Inst{5-4} = Rn{5-4};
1737 let DecoderMethod = "DecodeVSTInstruction";
1740 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
1741 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
1742 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
1744 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
1745 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
1746 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
1748 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
1749 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
1750 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
1752 // ...with address register writeback:
1753 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1754 RegisterOperand VdTy> {
1755 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1756 (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
1757 "vst2", Dt, "$Vd, $Rn!",
1758 "$Rn.addr = $wb", []> {
1759 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1760 let Inst{5-4} = Rn{5-4};
1761 let DecoderMethod = "DecodeVSTInstruction";
1762 let AsmMatchConverter = "cvtVSTwbFixed";
1764 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1765 (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1766 "vst2", Dt, "$Vd, $Rn, $Rm",
1767 "$Rn.addr = $wb", []> {
1768 let Inst{5-4} = Rn{5-4};
1769 let DecoderMethod = "DecodeVSTInstruction";
1770 let AsmMatchConverter = "cvtVSTwbRegister";
1773 multiclass VST2QWB<bits<4> op7_4, string Dt> {
1774 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1775 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1776 "vst2", Dt, "$Vd, $Rn!",
1777 "$Rn.addr = $wb", []> {
1778 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1779 let Inst{5-4} = Rn{5-4};
1780 let DecoderMethod = "DecodeVSTInstruction";
1781 let AsmMatchConverter = "cvtVSTwbFixed";
1783 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1784 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1786 "vst2", Dt, "$Vd, $Rn, $Rm",
1787 "$Rn.addr = $wb", []> {
1788 let Inst{5-4} = Rn{5-4};
1789 let DecoderMethod = "DecodeVSTInstruction";
1790 let AsmMatchConverter = "cvtVSTwbRegister";
1794 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
1795 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
1796 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
1798 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
1799 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
1800 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
1802 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1803 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1804 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1805 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1806 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1807 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1809 // ...with double-spaced registers
1810 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>;
1811 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
1812 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
1813 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>;
1814 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
1815 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
1817 // VST3 : Vector Store (multiple 3-element structures)
1818 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1819 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1820 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1821 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
1823 let Inst{4} = Rn{4};
1824 let DecoderMethod = "DecodeVSTInstruction";
1827 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
1828 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
1829 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
1831 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
1832 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
1833 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
1835 // ...with address register writeback:
1836 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1837 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1838 (ins addrmode6:$Rn, am6offset:$Rm,
1839 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1840 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1841 "$Rn.addr = $wb", []> {
1842 let Inst{4} = Rn{4};
1843 let DecoderMethod = "DecodeVSTInstruction";
1846 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
1847 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1848 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1850 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1851 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1852 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1854 // ...with double-spaced registers:
1855 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
1856 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
1857 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
1858 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
1859 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
1860 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
1862 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1863 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1864 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1866 // ...alternate versions to be allocated odd register numbers:
1867 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
1868 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
1869 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
1871 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1872 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1873 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1875 // VST4 : Vector Store (multiple 4-element structures)
1876 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
1877 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1878 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1879 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
1882 let Inst{5-4} = Rn{5-4};
1883 let DecoderMethod = "DecodeVSTInstruction";
1886 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
1887 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
1888 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
1890 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
1891 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
1892 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
1894 // ...with address register writeback:
1895 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1896 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1897 (ins addrmode6:$Rn, am6offset:$Rm,
1898 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
1899 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1900 "$Rn.addr = $wb", []> {
1901 let Inst{5-4} = Rn{5-4};
1902 let DecoderMethod = "DecodeVSTInstruction";
1905 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
1906 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
1907 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
1909 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1910 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1911 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1913 // ...with double-spaced registers:
1914 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
1915 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
1916 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
1917 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
1918 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
1919 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
1921 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1922 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1923 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1925 // ...alternate versions to be allocated odd register numbers:
1926 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
1927 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
1928 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
1930 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1931 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1932 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1934 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
1936 // Classes for VST*LN pseudo-instructions with multi-register operands.
1937 // These are expanded to real instructions after register allocation.
1938 class VSTQLNPseudo<InstrItinClass itin>
1939 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1941 class VSTQLNWBPseudo<InstrItinClass itin>
1942 : PseudoNLdSt<(outs GPR:$wb),
1943 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1944 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1945 class VSTQQLNPseudo<InstrItinClass itin>
1946 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1948 class VSTQQLNWBPseudo<InstrItinClass itin>
1949 : PseudoNLdSt<(outs GPR:$wb),
1950 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1951 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1952 class VSTQQQQLNPseudo<InstrItinClass itin>
1953 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1955 class VSTQQQQLNWBPseudo<InstrItinClass itin>
1956 : PseudoNLdSt<(outs GPR:$wb),
1957 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1958 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1960 // VST1LN : Vector Store (single element from one lane)
1961 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1962 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
1963 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1964 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
1965 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
1966 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
1968 let DecoderMethod = "DecodeVST1LN";
1970 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
1971 : VSTQLNPseudo<IIC_VST1ln> {
1972 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
1976 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
1977 NEONvgetlaneu, addrmode6> {
1978 let Inst{7-5} = lane{2-0};
1980 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
1981 NEONvgetlaneu, addrmode6> {
1982 let Inst{7-6} = lane{1-0};
1983 let Inst{4} = Rn{5};
1986 def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
1988 let Inst{7} = lane{0};
1989 let Inst{5-4} = Rn{5-4};
1992 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
1993 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
1994 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
1996 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
1997 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1998 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
1999 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2001 // ...with address register writeback:
2002 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2003 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2004 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2005 (ins AdrMode:$Rn, am6offset:$Rm,
2006 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2007 "\\{$Vd[$lane]\\}, $Rn$Rm",
2009 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2010 AdrMode:$Rn, am6offset:$Rm))]> {
2011 let DecoderMethod = "DecodeVST1LN";
2013 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2014 : VSTQLNWBPseudo<IIC_VST1lnu> {
2015 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2016 addrmode6:$addr, am6offset:$offset))];
2019 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2020 NEONvgetlaneu, addrmode6> {
2021 let Inst{7-5} = lane{2-0};
2023 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2024 NEONvgetlaneu, addrmode6> {
2025 let Inst{7-6} = lane{1-0};
2026 let Inst{4} = Rn{5};
2028 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2029 extractelt, addrmode6oneL32> {
2030 let Inst{7} = lane{0};
2031 let Inst{5-4} = Rn{5-4};
2034 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
2035 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
2036 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2038 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
2040 // VST2LN : Vector Store (single 2-element structure from one lane)
2041 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2042 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2043 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2044 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2047 let Inst{4} = Rn{4};
2048 let DecoderMethod = "DecodeVST2LN";
2051 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
2052 let Inst{7-5} = lane{2-0};
2054 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2055 let Inst{7-6} = lane{1-0};
2057 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2058 let Inst{7} = lane{0};
2061 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
2062 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
2063 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
2065 // ...with double-spaced registers:
2066 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2067 let Inst{7-6} = lane{1-0};
2068 let Inst{4} = Rn{4};
2070 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2071 let Inst{7} = lane{0};
2072 let Inst{4} = Rn{4};
2075 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
2076 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
2078 // ...with address register writeback:
2079 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2080 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2081 (ins addrmode6:$Rn, am6offset:$Rm,
2082 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2083 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2084 "$Rn.addr = $wb", []> {
2085 let Inst{4} = Rn{4};
2086 let DecoderMethod = "DecodeVST2LN";
2089 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2090 let Inst{7-5} = lane{2-0};
2092 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2093 let Inst{7-6} = lane{1-0};
2095 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2096 let Inst{7} = lane{0};
2099 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
2100 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
2101 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
2103 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2104 let Inst{7-6} = lane{1-0};
2106 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2107 let Inst{7} = lane{0};
2110 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
2111 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
2113 // VST3LN : Vector Store (single 3-element structure from one lane)
2114 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2115 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2116 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2117 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2118 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
2120 let DecoderMethod = "DecodeVST3LN";
2123 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
2124 let Inst{7-5} = lane{2-0};
2126 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2127 let Inst{7-6} = lane{1-0};
2129 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2130 let Inst{7} = lane{0};
2133 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
2134 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
2135 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
2137 // ...with double-spaced registers:
2138 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2139 let Inst{7-6} = lane{1-0};
2141 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2142 let Inst{7} = lane{0};
2145 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2146 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2148 // ...with address register writeback:
2149 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2150 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2151 (ins addrmode6:$Rn, am6offset:$Rm,
2152 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2153 IIC_VST3lnu, "vst3", Dt,
2154 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2155 "$Rn.addr = $wb", []> {
2156 let DecoderMethod = "DecodeVST3LN";
2159 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2160 let Inst{7-5} = lane{2-0};
2162 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2163 let Inst{7-6} = lane{1-0};
2165 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2166 let Inst{7} = lane{0};
2169 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
2170 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
2171 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
2173 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2174 let Inst{7-6} = lane{1-0};
2176 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2177 let Inst{7} = lane{0};
2180 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
2181 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
2183 // VST4LN : Vector Store (single 4-element structure from one lane)
2184 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2185 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2186 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2187 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2188 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2191 let Inst{4} = Rn{4};
2192 let DecoderMethod = "DecodeVST4LN";
2195 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
2196 let Inst{7-5} = lane{2-0};
2198 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2199 let Inst{7-6} = lane{1-0};
2201 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2202 let Inst{7} = lane{0};
2203 let Inst{5} = Rn{5};
2206 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
2207 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
2208 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
2210 // ...with double-spaced registers:
2211 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2212 let Inst{7-6} = lane{1-0};
2214 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2215 let Inst{7} = lane{0};
2216 let Inst{5} = Rn{5};
2219 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
2220 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
2222 // ...with address register writeback:
2223 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2224 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2225 (ins addrmode6:$Rn, am6offset:$Rm,
2226 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2227 IIC_VST4lnu, "vst4", Dt,
2228 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2229 "$Rn.addr = $wb", []> {
2230 let Inst{4} = Rn{4};
2231 let DecoderMethod = "DecodeVST4LN";
2234 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2235 let Inst{7-5} = lane{2-0};
2237 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2238 let Inst{7-6} = lane{1-0};
2240 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2241 let Inst{7} = lane{0};
2242 let Inst{5} = Rn{5};
2245 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
2246 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
2247 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
2249 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2250 let Inst{7-6} = lane{1-0};
2252 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2253 let Inst{7} = lane{0};
2254 let Inst{5} = Rn{5};
2257 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
2258 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
2260 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
2262 // Use vld1/vst1 for unaligned f64 load / store
2263 def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2264 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
2265 def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2266 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2267 def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2268 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
2269 def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2270 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
2271 def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2272 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
2273 def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2274 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
2276 //===----------------------------------------------------------------------===//
2277 // NEON pattern fragments
2278 //===----------------------------------------------------------------------===//
2280 // Extract D sub-registers of Q registers.
2281 def DSubReg_i8_reg : SDNodeXForm<imm, [{
2282 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2283 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
2285 def DSubReg_i16_reg : SDNodeXForm<imm, [{
2286 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2287 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
2289 def DSubReg_i32_reg : SDNodeXForm<imm, [{
2290 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2291 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
2293 def DSubReg_f64_reg : SDNodeXForm<imm, [{
2294 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2295 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
2298 // Extract S sub-registers of Q/D registers.
2299 def SSubReg_f32_reg : SDNodeXForm<imm, [{
2300 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2301 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
2304 // Translate lane numbers from Q registers to D subregs.
2305 def SubReg_i8_lane : SDNodeXForm<imm, [{
2306 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
2308 def SubReg_i16_lane : SDNodeXForm<imm, [{
2309 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
2311 def SubReg_i32_lane : SDNodeXForm<imm, [{
2312 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
2315 //===----------------------------------------------------------------------===//
2316 // Instruction Classes
2317 //===----------------------------------------------------------------------===//
2319 // Basic 2-register operations: double- and quad-register.
2320 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2321 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2322 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2323 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2324 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2325 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2326 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2327 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2328 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2329 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2330 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2331 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2333 // Basic 2-register intrinsics, both double- and quad-register.
2334 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2335 bits<2> op17_16, bits<5> op11_7, bit op4,
2336 InstrItinClass itin, string OpcodeStr, string Dt,
2337 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2338 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2339 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2340 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2341 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2342 bits<2> op17_16, bits<5> op11_7, bit op4,
2343 InstrItinClass itin, string OpcodeStr, string Dt,
2344 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2345 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2346 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2347 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2349 // Narrow 2-register operations.
2350 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2351 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2352 InstrItinClass itin, string OpcodeStr, string Dt,
2353 ValueType TyD, ValueType TyQ, SDNode OpNode>
2354 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2355 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2356 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2358 // Narrow 2-register intrinsics.
2359 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2360 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2361 InstrItinClass itin, string OpcodeStr, string Dt,
2362 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2363 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2364 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2365 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2367 // Long 2-register operations (currently only used for VMOVL).
2368 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2369 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2370 InstrItinClass itin, string OpcodeStr, string Dt,
2371 ValueType TyQ, ValueType TyD, SDNode OpNode>
2372 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2373 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2374 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2376 // Long 2-register intrinsics.
2377 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2378 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2379 InstrItinClass itin, string OpcodeStr, string Dt,
2380 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2381 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2382 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2383 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2385 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2386 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2387 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2388 (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2389 OpcodeStr, Dt, "$Vd, $Vm",
2390 "$src1 = $Vd, $src2 = $Vm", []>;
2391 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2392 InstrItinClass itin, string OpcodeStr, string Dt>
2393 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2394 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2395 "$src1 = $Vd, $src2 = $Vm", []>;
2397 // Basic 3-register operations: double- and quad-register.
2398 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2399 InstrItinClass itin, string OpcodeStr, string Dt,
2400 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2401 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2402 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2403 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2404 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2405 // All of these have a two-operand InstAlias.
2406 let TwoOperandAliasConstraint = "$Vn = $Vd";
2407 let isCommutable = Commutable;
2409 // Same as N3VD but no data type.
2410 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2411 InstrItinClass itin, string OpcodeStr,
2412 ValueType ResTy, ValueType OpTy,
2413 SDNode OpNode, bit Commutable>
2414 : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2415 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2416 OpcodeStr, "$Vd, $Vn, $Vm", "",
2417 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2418 // All of these have a two-operand InstAlias.
2419 let TwoOperandAliasConstraint = "$Vn = $Vd";
2420 let isCommutable = Commutable;
2423 class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2424 InstrItinClass itin, string OpcodeStr, string Dt,
2425 ValueType Ty, SDNode ShOp>
2426 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2427 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2428 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2430 (Ty (ShOp (Ty DPR:$Vn),
2431 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2432 // All of these have a two-operand InstAlias.
2433 let TwoOperandAliasConstraint = "$Vn = $Vd";
2434 let isCommutable = 0;
2436 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2437 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2438 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2439 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2440 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2442 (Ty (ShOp (Ty DPR:$Vn),
2443 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2444 // All of these have a two-operand InstAlias.
2445 let TwoOperandAliasConstraint = "$Vn = $Vd";
2446 let isCommutable = 0;
2449 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2450 InstrItinClass itin, string OpcodeStr, string Dt,
2451 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2452 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2453 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2454 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2455 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2456 // All of these have a two-operand InstAlias.
2457 let TwoOperandAliasConstraint = "$Vn = $Vd";
2458 let isCommutable = Commutable;
2460 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2461 InstrItinClass itin, string OpcodeStr,
2462 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2463 : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2464 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2465 OpcodeStr, "$Vd, $Vn, $Vm", "",
2466 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2467 // All of these have a two-operand InstAlias.
2468 let TwoOperandAliasConstraint = "$Vn = $Vd";
2469 let isCommutable = Commutable;
2471 class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2472 InstrItinClass itin, string OpcodeStr, string Dt,
2473 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2474 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2475 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2476 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2477 [(set (ResTy QPR:$Vd),
2478 (ResTy (ShOp (ResTy QPR:$Vn),
2479 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2481 // All of these have a two-operand InstAlias.
2482 let TwoOperandAliasConstraint = "$Vn = $Vd";
2483 let isCommutable = 0;
2485 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2486 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2487 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2488 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2489 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2490 [(set (ResTy QPR:$Vd),
2491 (ResTy (ShOp (ResTy QPR:$Vn),
2492 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2494 // All of these have a two-operand InstAlias.
2495 let TwoOperandAliasConstraint = "$Vn = $Vd";
2496 let isCommutable = 0;
2499 // Basic 3-register intrinsics, both double- and quad-register.
2500 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2501 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2502 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2503 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2504 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2505 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2506 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2507 // All of these have a two-operand InstAlias.
2508 let TwoOperandAliasConstraint = "$Vn = $Vd";
2509 let isCommutable = Commutable;
2511 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2512 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2513 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2514 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2515 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2517 (Ty (IntOp (Ty DPR:$Vn),
2518 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2520 let isCommutable = 0;
2522 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2523 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2524 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2525 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2526 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2528 (Ty (IntOp (Ty DPR:$Vn),
2529 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2530 let isCommutable = 0;
2532 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2533 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2534 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2535 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2536 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2537 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2538 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2539 let TwoOperandAliasConstraint = "$Vm = $Vd";
2540 let isCommutable = 0;
2543 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2544 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2545 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2546 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2547 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2548 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2549 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2550 // All of these have a two-operand InstAlias.
2551 let TwoOperandAliasConstraint = "$Vn = $Vd";
2552 let isCommutable = Commutable;
2554 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2555 string OpcodeStr, string Dt,
2556 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2557 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2558 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2559 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2560 [(set (ResTy QPR:$Vd),
2561 (ResTy (IntOp (ResTy QPR:$Vn),
2562 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2564 let isCommutable = 0;
2566 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2567 string OpcodeStr, string Dt,
2568 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2569 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2570 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2571 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2572 [(set (ResTy QPR:$Vd),
2573 (ResTy (IntOp (ResTy QPR:$Vn),
2574 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2576 let isCommutable = 0;
2578 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2579 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2580 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2581 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2582 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2583 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2584 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2585 let TwoOperandAliasConstraint = "$Vm = $Vd";
2586 let isCommutable = 0;
2589 // Multiply-Add/Sub operations: double- and quad-register.
2590 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2591 InstrItinClass itin, string OpcodeStr, string Dt,
2592 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2593 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2594 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2595 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2596 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2597 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2599 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2600 string OpcodeStr, string Dt,
2601 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2602 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2604 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2606 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2608 (Ty (ShOp (Ty DPR:$src1),
2610 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2612 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2613 string OpcodeStr, string Dt,
2614 ValueType Ty, SDNode MulOp, SDNode ShOp>
2615 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2617 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2619 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2621 (Ty (ShOp (Ty DPR:$src1),
2623 (Ty (NEONvduplane (Ty DPR_8:$Vm),
2626 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2627 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2628 SDPatternOperator MulOp, SDPatternOperator OpNode>
2629 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2630 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2631 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2632 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2633 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2634 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2635 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2636 SDPatternOperator MulOp, SDPatternOperator ShOp>
2637 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2639 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2641 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2642 [(set (ResTy QPR:$Vd),
2643 (ResTy (ShOp (ResTy QPR:$src1),
2644 (ResTy (MulOp QPR:$Vn,
2645 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2647 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2648 string OpcodeStr, string Dt,
2649 ValueType ResTy, ValueType OpTy,
2650 SDNode MulOp, SDNode ShOp>
2651 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2653 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2655 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2656 [(set (ResTy QPR:$Vd),
2657 (ResTy (ShOp (ResTy QPR:$src1),
2658 (ResTy (MulOp QPR:$Vn,
2659 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2662 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2663 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2664 InstrItinClass itin, string OpcodeStr, string Dt,
2665 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2666 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2667 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2668 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2669 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2670 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2671 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2672 InstrItinClass itin, string OpcodeStr, string Dt,
2673 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2674 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2675 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2676 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2677 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2678 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2680 // Neon 3-argument intrinsics, both double- and quad-register.
2681 // The destination register is also used as the first source operand register.
2682 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2683 InstrItinClass itin, string OpcodeStr, string Dt,
2684 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2685 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2686 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2687 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2688 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2689 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2690 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2691 InstrItinClass itin, string OpcodeStr, string Dt,
2692 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2693 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2694 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2695 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2696 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2697 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2699 // Long Multiply-Add/Sub operations.
2700 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2701 InstrItinClass itin, string OpcodeStr, string Dt,
2702 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2703 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2704 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2705 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2706 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2707 (TyQ (MulOp (TyD DPR:$Vn),
2708 (TyD DPR:$Vm)))))]>;
2709 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2710 InstrItinClass itin, string OpcodeStr, string Dt,
2711 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2712 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2713 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2715 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2717 (OpNode (TyQ QPR:$src1),
2718 (TyQ (MulOp (TyD DPR:$Vn),
2719 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
2721 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2722 InstrItinClass itin, string OpcodeStr, string Dt,
2723 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2724 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2725 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2727 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2729 (OpNode (TyQ QPR:$src1),
2730 (TyQ (MulOp (TyD DPR:$Vn),
2731 (TyD (NEONvduplane (TyD DPR_8:$Vm),
2734 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
2735 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2736 InstrItinClass itin, string OpcodeStr, string Dt,
2737 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
2739 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2740 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2741 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2742 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2743 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2744 (TyD DPR:$Vm)))))))]>;
2746 // Neon Long 3-argument intrinsic. The destination register is
2747 // a quad-register and is also used as the first source operand register.
2748 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2749 InstrItinClass itin, string OpcodeStr, string Dt,
2750 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2751 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2752 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2753 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2755 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
2756 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2757 string OpcodeStr, string Dt,
2758 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2759 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2761 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2763 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2764 [(set (ResTy QPR:$Vd),
2765 (ResTy (IntOp (ResTy QPR:$src1),
2767 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2769 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2770 InstrItinClass itin, string OpcodeStr, string Dt,
2771 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2772 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2774 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2776 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2777 [(set (ResTy QPR:$Vd),
2778 (ResTy (IntOp (ResTy QPR:$src1),
2780 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2783 // Narrowing 3-register intrinsics.
2784 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2785 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
2786 SDPatternOperator IntOp, bit Commutable>
2787 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2788 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
2789 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2790 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
2791 let isCommutable = Commutable;
2794 // Long 3-register operations.
2795 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2796 InstrItinClass itin, string OpcodeStr, string Dt,
2797 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
2798 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2799 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2800 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2801 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2802 let isCommutable = Commutable;
2804 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2805 InstrItinClass itin, string OpcodeStr, string Dt,
2806 ValueType TyQ, ValueType TyD, SDNode OpNode>
2807 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2808 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2809 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2811 (TyQ (OpNode (TyD DPR:$Vn),
2812 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
2813 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2814 InstrItinClass itin, string OpcodeStr, string Dt,
2815 ValueType TyQ, ValueType TyD, SDNode OpNode>
2816 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2817 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2818 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2820 (TyQ (OpNode (TyD DPR:$Vn),
2821 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
2823 // Long 3-register operations with explicitly extended operands.
2824 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2825 InstrItinClass itin, string OpcodeStr, string Dt,
2826 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
2828 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2829 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2830 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2831 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
2832 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
2833 let isCommutable = Commutable;
2836 // Long 3-register intrinsics with explicit extend (VABDL).
2837 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2838 InstrItinClass itin, string OpcodeStr, string Dt,
2839 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
2841 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2842 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2843 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2844 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2845 (TyD DPR:$Vm))))))]> {
2846 let isCommutable = Commutable;
2849 // Long 3-register intrinsics.
2850 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2851 InstrItinClass itin, string OpcodeStr, string Dt,
2852 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
2853 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2854 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2855 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2856 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2857 let isCommutable = Commutable;
2859 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2860 string OpcodeStr, string Dt,
2861 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2862 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2863 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2864 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2865 [(set (ResTy QPR:$Vd),
2866 (ResTy (IntOp (OpTy DPR:$Vn),
2867 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2869 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2870 InstrItinClass itin, string OpcodeStr, string Dt,
2871 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2872 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2873 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2874 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2875 [(set (ResTy QPR:$Vd),
2876 (ResTy (IntOp (OpTy DPR:$Vn),
2877 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2880 // Wide 3-register operations.
2881 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2882 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
2883 SDNode OpNode, SDNode ExtOp, bit Commutable>
2884 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2885 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
2886 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2887 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
2888 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
2889 // All of these have a two-operand InstAlias.
2890 let TwoOperandAliasConstraint = "$Vn = $Vd";
2891 let isCommutable = Commutable;
2894 // Pairwise long 2-register intrinsics, both double- and quad-register.
2895 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2896 bits<2> op17_16, bits<5> op11_7, bit op4,
2897 string OpcodeStr, string Dt,
2898 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2899 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2900 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
2901 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2902 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2903 bits<2> op17_16, bits<5> op11_7, bit op4,
2904 string OpcodeStr, string Dt,
2905 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2906 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2907 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
2908 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2910 // Pairwise long 2-register accumulate intrinsics,
2911 // both double- and quad-register.
2912 // The destination register is also used as the first source operand register.
2913 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2914 bits<2> op17_16, bits<5> op11_7, bit op4,
2915 string OpcodeStr, string Dt,
2916 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2917 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
2918 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
2919 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
2920 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
2921 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2922 bits<2> op17_16, bits<5> op11_7, bit op4,
2923 string OpcodeStr, string Dt,
2924 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2925 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
2926 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
2927 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
2928 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
2930 // Shift by immediate,
2931 // both double- and quad-register.
2932 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
2933 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2934 Format f, InstrItinClass itin, Operand ImmTy,
2935 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
2936 : N2VImm<op24, op23, op11_8, op7, 0, op4,
2937 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
2938 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2939 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
2940 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2941 Format f, InstrItinClass itin, Operand ImmTy,
2942 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
2943 : N2VImm<op24, op23, op11_8, op7, 1, op4,
2944 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
2945 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2946 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
2949 // Long shift by immediate.
2950 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
2951 string OpcodeStr, string Dt,
2952 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
2953 : N2VImm<op24, op23, op11_8, op7, op6, op4,
2954 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
2955 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2956 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
2957 (i32 imm:$SIMM))))]>;
2959 // Narrow shift by immediate.
2960 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
2961 InstrItinClass itin, string OpcodeStr, string Dt,
2962 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
2963 : N2VImm<op24, op23, op11_8, op7, op6, op4,
2964 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
2965 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2966 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
2967 (i32 imm:$SIMM))))]>;
2969 // Shift right by immediate and accumulate,
2970 // both double- and quad-register.
2971 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
2972 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2973 Operand ImmTy, string OpcodeStr, string Dt,
2974 ValueType Ty, SDNode ShOp>
2975 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
2976 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
2977 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2978 [(set DPR:$Vd, (Ty (add DPR:$src1,
2979 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
2980 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2981 Operand ImmTy, string OpcodeStr, string Dt,
2982 ValueType Ty, SDNode ShOp>
2983 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
2984 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
2985 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2986 [(set QPR:$Vd, (Ty (add QPR:$src1,
2987 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
2990 // Shift by immediate and insert,
2991 // both double- and quad-register.
2992 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
2993 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2994 Operand ImmTy, Format f, string OpcodeStr, string Dt,
2995 ValueType Ty,SDNode ShOp>
2996 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
2997 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
2998 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2999 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3000 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3001 Operand ImmTy, Format f, string OpcodeStr, string Dt,
3002 ValueType Ty,SDNode ShOp>
3003 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3004 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3005 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3006 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3009 // Convert, with fractional bits immediate,
3010 // both double- and quad-register.
3011 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3012 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3013 SDPatternOperator IntOp>
3014 : N2VImm<op24, op23, op11_8, op7, 0, op4,
3015 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3016 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3017 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3018 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3019 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3020 SDPatternOperator IntOp>
3021 : N2VImm<op24, op23, op11_8, op7, 1, op4,
3022 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3023 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3024 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3026 //===----------------------------------------------------------------------===//
3028 //===----------------------------------------------------------------------===//
3030 // Abbreviations used in multiclass suffixes:
3031 // Q = quarter int (8 bit) elements
3032 // H = half int (16 bit) elements
3033 // S = single int (32 bit) elements
3034 // D = double int (64 bit) elements
3036 // Neon 2-register vector operations and intrinsics.
3038 // Neon 2-register comparisons.
3039 // source operand element sizes of 8, 16 and 32 bits:
3040 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3041 bits<5> op11_7, bit op4, string opc, string Dt,
3042 string asm, SDNode OpNode> {
3043 // 64-bit vector types.
3044 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3045 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3046 opc, !strconcat(Dt, "8"), asm, "",
3047 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
3048 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3049 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3050 opc, !strconcat(Dt, "16"), asm, "",
3051 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
3052 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3053 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3054 opc, !strconcat(Dt, "32"), asm, "",
3055 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
3056 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3057 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3058 opc, "f32", asm, "",
3059 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
3060 let Inst{10} = 1; // overwrite F = 1
3063 // 128-bit vector types.
3064 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3065 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3066 opc, !strconcat(Dt, "8"), asm, "",
3067 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
3068 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3069 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3070 opc, !strconcat(Dt, "16"), asm, "",
3071 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
3072 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3073 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3074 opc, !strconcat(Dt, "32"), asm, "",
3075 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
3076 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3077 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3078 opc, "f32", asm, "",
3079 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
3080 let Inst{10} = 1; // overwrite F = 1
3085 // Neon 2-register vector intrinsics,
3086 // element sizes of 8, 16 and 32 bits:
3087 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3088 bits<5> op11_7, bit op4,
3089 InstrItinClass itinD, InstrItinClass itinQ,
3090 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3091 // 64-bit vector types.
3092 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3093 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3094 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3095 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3096 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3097 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3099 // 128-bit vector types.
3100 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3101 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3102 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3103 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3104 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3105 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3109 // Neon Narrowing 2-register vector operations,
3110 // source operand element sizes of 16, 32 and 64 bits:
3111 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3112 bits<5> op11_7, bit op6, bit op4,
3113 InstrItinClass itin, string OpcodeStr, string Dt,
3115 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3116 itin, OpcodeStr, !strconcat(Dt, "16"),
3117 v8i8, v8i16, OpNode>;
3118 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3119 itin, OpcodeStr, !strconcat(Dt, "32"),
3120 v4i16, v4i32, OpNode>;
3121 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3122 itin, OpcodeStr, !strconcat(Dt, "64"),
3123 v2i32, v2i64, OpNode>;
3126 // Neon Narrowing 2-register vector intrinsics,
3127 // source operand element sizes of 16, 32 and 64 bits:
3128 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3129 bits<5> op11_7, bit op6, bit op4,
3130 InstrItinClass itin, string OpcodeStr, string Dt,
3131 SDPatternOperator IntOp> {
3132 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3133 itin, OpcodeStr, !strconcat(Dt, "16"),
3134 v8i8, v8i16, IntOp>;
3135 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3136 itin, OpcodeStr, !strconcat(Dt, "32"),
3137 v4i16, v4i32, IntOp>;
3138 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3139 itin, OpcodeStr, !strconcat(Dt, "64"),
3140 v2i32, v2i64, IntOp>;
3144 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3145 // source operand element sizes of 16, 32 and 64 bits:
3146 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3147 string OpcodeStr, string Dt, SDNode OpNode> {
3148 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3149 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3150 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3151 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3152 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3153 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3157 // Neon 3-register vector operations.
3159 // First with only element sizes of 8, 16 and 32 bits:
3160 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3161 InstrItinClass itinD16, InstrItinClass itinD32,
3162 InstrItinClass itinQ16, InstrItinClass itinQ32,
3163 string OpcodeStr, string Dt,
3164 SDNode OpNode, bit Commutable = 0> {
3165 // 64-bit vector types.
3166 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3167 OpcodeStr, !strconcat(Dt, "8"),
3168 v8i8, v8i8, OpNode, Commutable>;
3169 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3170 OpcodeStr, !strconcat(Dt, "16"),
3171 v4i16, v4i16, OpNode, Commutable>;
3172 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3173 OpcodeStr, !strconcat(Dt, "32"),
3174 v2i32, v2i32, OpNode, Commutable>;
3176 // 128-bit vector types.
3177 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3178 OpcodeStr, !strconcat(Dt, "8"),
3179 v16i8, v16i8, OpNode, Commutable>;
3180 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3181 OpcodeStr, !strconcat(Dt, "16"),
3182 v8i16, v8i16, OpNode, Commutable>;
3183 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3184 OpcodeStr, !strconcat(Dt, "32"),
3185 v4i32, v4i32, OpNode, Commutable>;
3188 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3189 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3190 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3191 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3192 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3193 v4i32, v2i32, ShOp>;
3196 // ....then also with element size 64 bits:
3197 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3198 InstrItinClass itinD, InstrItinClass itinQ,
3199 string OpcodeStr, string Dt,
3200 SDNode OpNode, bit Commutable = 0>
3201 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3202 OpcodeStr, Dt, OpNode, Commutable> {
3203 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3204 OpcodeStr, !strconcat(Dt, "64"),
3205 v1i64, v1i64, OpNode, Commutable>;
3206 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3207 OpcodeStr, !strconcat(Dt, "64"),
3208 v2i64, v2i64, OpNode, Commutable>;
3212 // Neon 3-register vector intrinsics.
3214 // First with only element sizes of 16 and 32 bits:
3215 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3216 InstrItinClass itinD16, InstrItinClass itinD32,
3217 InstrItinClass itinQ16, InstrItinClass itinQ32,
3218 string OpcodeStr, string Dt,
3219 SDPatternOperator IntOp, bit Commutable = 0> {
3220 // 64-bit vector types.
3221 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3222 OpcodeStr, !strconcat(Dt, "16"),
3223 v4i16, v4i16, IntOp, Commutable>;
3224 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3225 OpcodeStr, !strconcat(Dt, "32"),
3226 v2i32, v2i32, IntOp, Commutable>;
3228 // 128-bit vector types.
3229 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3230 OpcodeStr, !strconcat(Dt, "16"),
3231 v8i16, v8i16, IntOp, Commutable>;
3232 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3233 OpcodeStr, !strconcat(Dt, "32"),
3234 v4i32, v4i32, IntOp, Commutable>;
3236 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3237 InstrItinClass itinD16, InstrItinClass itinD32,
3238 InstrItinClass itinQ16, InstrItinClass itinQ32,
3239 string OpcodeStr, string Dt,
3240 SDPatternOperator IntOp> {
3241 // 64-bit vector types.
3242 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3243 OpcodeStr, !strconcat(Dt, "16"),
3244 v4i16, v4i16, IntOp>;
3245 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3246 OpcodeStr, !strconcat(Dt, "32"),
3247 v2i32, v2i32, IntOp>;
3249 // 128-bit vector types.
3250 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3251 OpcodeStr, !strconcat(Dt, "16"),
3252 v8i16, v8i16, IntOp>;
3253 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3254 OpcodeStr, !strconcat(Dt, "32"),
3255 v4i32, v4i32, IntOp>;
3258 multiclass N3VIntSL_HS<bits<4> op11_8,
3259 InstrItinClass itinD16, InstrItinClass itinD32,
3260 InstrItinClass itinQ16, InstrItinClass itinQ32,
3261 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3262 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3263 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3264 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3265 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3266 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3267 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3268 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3269 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3272 // ....then also with element size of 8 bits:
3273 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3274 InstrItinClass itinD16, InstrItinClass itinD32,
3275 InstrItinClass itinQ16, InstrItinClass itinQ32,
3276 string OpcodeStr, string Dt,
3277 SDPatternOperator IntOp, bit Commutable = 0>
3278 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3279 OpcodeStr, Dt, IntOp, Commutable> {
3280 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3281 OpcodeStr, !strconcat(Dt, "8"),
3282 v8i8, v8i8, IntOp, Commutable>;
3283 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3284 OpcodeStr, !strconcat(Dt, "8"),
3285 v16i8, v16i8, IntOp, Commutable>;
3287 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3288 InstrItinClass itinD16, InstrItinClass itinD32,
3289 InstrItinClass itinQ16, InstrItinClass itinQ32,
3290 string OpcodeStr, string Dt,
3291 SDPatternOperator IntOp>
3292 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3293 OpcodeStr, Dt, IntOp> {
3294 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3295 OpcodeStr, !strconcat(Dt, "8"),
3297 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3298 OpcodeStr, !strconcat(Dt, "8"),
3299 v16i8, v16i8, IntOp>;
3303 // ....then also with element size of 64 bits:
3304 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3305 InstrItinClass itinD16, InstrItinClass itinD32,
3306 InstrItinClass itinQ16, InstrItinClass itinQ32,
3307 string OpcodeStr, string Dt,
3308 SDPatternOperator IntOp, bit Commutable = 0>
3309 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3310 OpcodeStr, Dt, IntOp, Commutable> {
3311 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3312 OpcodeStr, !strconcat(Dt, "64"),
3313 v1i64, v1i64, IntOp, Commutable>;
3314 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3315 OpcodeStr, !strconcat(Dt, "64"),
3316 v2i64, v2i64, IntOp, Commutable>;
3318 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3319 InstrItinClass itinD16, InstrItinClass itinD32,
3320 InstrItinClass itinQ16, InstrItinClass itinQ32,
3321 string OpcodeStr, string Dt,
3322 SDPatternOperator IntOp>
3323 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3324 OpcodeStr, Dt, IntOp> {
3325 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3326 OpcodeStr, !strconcat(Dt, "64"),
3327 v1i64, v1i64, IntOp>;
3328 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3329 OpcodeStr, !strconcat(Dt, "64"),
3330 v2i64, v2i64, IntOp>;
3333 // Neon Narrowing 3-register vector intrinsics,
3334 // source operand element sizes of 16, 32 and 64 bits:
3335 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3336 string OpcodeStr, string Dt,
3337 SDPatternOperator IntOp, bit Commutable = 0> {
3338 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
3339 OpcodeStr, !strconcat(Dt, "16"),
3340 v8i8, v8i16, IntOp, Commutable>;
3341 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3342 OpcodeStr, !strconcat(Dt, "32"),
3343 v4i16, v4i32, IntOp, Commutable>;
3344 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3345 OpcodeStr, !strconcat(Dt, "64"),
3346 v2i32, v2i64, IntOp, Commutable>;
3350 // Neon Long 3-register vector operations.
3352 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3353 InstrItinClass itin16, InstrItinClass itin32,
3354 string OpcodeStr, string Dt,
3355 SDNode OpNode, bit Commutable = 0> {
3356 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3357 OpcodeStr, !strconcat(Dt, "8"),
3358 v8i16, v8i8, OpNode, Commutable>;
3359 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3360 OpcodeStr, !strconcat(Dt, "16"),
3361 v4i32, v4i16, OpNode, Commutable>;
3362 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3363 OpcodeStr, !strconcat(Dt, "32"),
3364 v2i64, v2i32, OpNode, Commutable>;
3367 multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3368 InstrItinClass itin, string OpcodeStr, string Dt,
3370 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3371 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3372 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3373 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3376 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3377 InstrItinClass itin16, InstrItinClass itin32,
3378 string OpcodeStr, string Dt,
3379 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3380 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3381 OpcodeStr, !strconcat(Dt, "8"),
3382 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3383 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3384 OpcodeStr, !strconcat(Dt, "16"),
3385 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3386 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3387 OpcodeStr, !strconcat(Dt, "32"),
3388 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3391 // Neon Long 3-register vector intrinsics.
3393 // First with only element sizes of 16 and 32 bits:
3394 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3395 InstrItinClass itin16, InstrItinClass itin32,
3396 string OpcodeStr, string Dt,
3397 SDPatternOperator IntOp, bit Commutable = 0> {
3398 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3399 OpcodeStr, !strconcat(Dt, "16"),
3400 v4i32, v4i16, IntOp, Commutable>;
3401 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3402 OpcodeStr, !strconcat(Dt, "32"),
3403 v2i64, v2i32, IntOp, Commutable>;
3406 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3407 InstrItinClass itin, string OpcodeStr, string Dt,
3408 SDPatternOperator IntOp> {
3409 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3410 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3411 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3412 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3415 // ....then also with element size of 8 bits:
3416 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3417 InstrItinClass itin16, InstrItinClass itin32,
3418 string OpcodeStr, string Dt,
3419 SDPatternOperator IntOp, bit Commutable = 0>
3420 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3421 IntOp, Commutable> {
3422 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3423 OpcodeStr, !strconcat(Dt, "8"),
3424 v8i16, v8i8, IntOp, Commutable>;
3427 // ....with explicit extend (VABDL).
3428 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3429 InstrItinClass itin, string OpcodeStr, string Dt,
3430 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3431 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3432 OpcodeStr, !strconcat(Dt, "8"),
3433 v8i16, v8i8, IntOp, ExtOp, Commutable>;
3434 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3435 OpcodeStr, !strconcat(Dt, "16"),
3436 v4i32, v4i16, IntOp, ExtOp, Commutable>;
3437 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3438 OpcodeStr, !strconcat(Dt, "32"),
3439 v2i64, v2i32, IntOp, ExtOp, Commutable>;
3443 // Neon Wide 3-register vector intrinsics,
3444 // source operand element sizes of 8, 16 and 32 bits:
3445 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3446 string OpcodeStr, string Dt,
3447 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3448 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3449 OpcodeStr, !strconcat(Dt, "8"),
3450 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3451 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3452 OpcodeStr, !strconcat(Dt, "16"),
3453 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3454 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3455 OpcodeStr, !strconcat(Dt, "32"),
3456 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3460 // Neon Multiply-Op vector operations,
3461 // element sizes of 8, 16 and 32 bits:
3462 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3463 InstrItinClass itinD16, InstrItinClass itinD32,
3464 InstrItinClass itinQ16, InstrItinClass itinQ32,
3465 string OpcodeStr, string Dt, SDNode OpNode> {
3466 // 64-bit vector types.
3467 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3468 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3469 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3470 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3471 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3472 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3474 // 128-bit vector types.
3475 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3476 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3477 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3478 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3479 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3480 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3483 multiclass N3VMulOpSL_HS<bits<4> op11_8,
3484 InstrItinClass itinD16, InstrItinClass itinD32,
3485 InstrItinClass itinQ16, InstrItinClass itinQ32,
3486 string OpcodeStr, string Dt, SDNode ShOp> {
3487 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3488 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3489 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3490 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3491 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3492 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3494 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3495 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3499 // Neon Intrinsic-Op vector operations,
3500 // element sizes of 8, 16 and 32 bits:
3501 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3502 InstrItinClass itinD, InstrItinClass itinQ,
3503 string OpcodeStr, string Dt, SDPatternOperator IntOp,
3505 // 64-bit vector types.
3506 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3507 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3508 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3509 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3510 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3511 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3513 // 128-bit vector types.
3514 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3515 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3516 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3517 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3518 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3519 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3522 // Neon 3-argument intrinsics,
3523 // element sizes of 8, 16 and 32 bits:
3524 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3525 InstrItinClass itinD, InstrItinClass itinQ,
3526 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3527 // 64-bit vector types.
3528 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
3529 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3530 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
3531 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3532 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
3533 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3535 // 128-bit vector types.
3536 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
3537 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3538 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
3539 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3540 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
3541 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3545 // Neon Long Multiply-Op vector operations,
3546 // element sizes of 8, 16 and 32 bits:
3547 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3548 InstrItinClass itin16, InstrItinClass itin32,
3549 string OpcodeStr, string Dt, SDNode MulOp,
3551 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3552 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3553 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3554 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3555 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3556 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3559 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3560 string Dt, SDNode MulOp, SDNode OpNode> {
3561 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3562 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3563 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3564 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3568 // Neon Long 3-argument intrinsics.
3570 // First with only element sizes of 16 and 32 bits:
3571 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3572 InstrItinClass itin16, InstrItinClass itin32,
3573 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3574 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3575 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3576 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3577 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3580 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3581 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3582 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3583 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3584 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3585 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3588 // ....then also with element size of 8 bits:
3589 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3590 InstrItinClass itin16, InstrItinClass itin32,
3591 string OpcodeStr, string Dt, SDPatternOperator IntOp>
3592 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3593 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3594 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3597 // ....with explicit extend (VABAL).
3598 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3599 InstrItinClass itin, string OpcodeStr, string Dt,
3600 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3601 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3602 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3603 IntOp, ExtOp, OpNode>;
3604 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3605 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3606 IntOp, ExtOp, OpNode>;
3607 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3608 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3609 IntOp, ExtOp, OpNode>;
3613 // Neon Pairwise long 2-register intrinsics,
3614 // element sizes of 8, 16 and 32 bits:
3615 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3616 bits<5> op11_7, bit op4,
3617 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3618 // 64-bit vector types.
3619 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3620 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3621 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3622 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3623 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3624 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3626 // 128-bit vector types.
3627 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3628 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3629 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3630 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3631 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3632 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3636 // Neon Pairwise long 2-register accumulate intrinsics,
3637 // element sizes of 8, 16 and 32 bits:
3638 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3639 bits<5> op11_7, bit op4,
3640 string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3641 // 64-bit vector types.
3642 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3643 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3644 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3645 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3646 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3647 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3649 // 128-bit vector types.
3650 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3651 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3652 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3653 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3654 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3655 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3659 // Neon 2-register vector shift by immediate,
3660 // with f of either N2RegVShLFrm or N2RegVShRFrm
3661 // element sizes of 8, 16, 32 and 64 bits:
3662 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3663 InstrItinClass itin, string OpcodeStr, string Dt,
3665 // 64-bit vector types.
3666 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3667 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3668 let Inst{21-19} = 0b001; // imm6 = 001xxx
3670 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3671 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3672 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3674 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3675 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3676 let Inst{21} = 0b1; // imm6 = 1xxxxx
3678 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3679 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3682 // 128-bit vector types.
3683 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3684 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3685 let Inst{21-19} = 0b001; // imm6 = 001xxx
3687 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3688 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3689 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3691 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3692 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3693 let Inst{21} = 0b1; // imm6 = 1xxxxx
3695 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3696 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3699 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3700 InstrItinClass itin, string OpcodeStr, string Dt,
3701 string baseOpc, SDNode OpNode> {
3702 // 64-bit vector types.
3703 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3704 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3705 let Inst{21-19} = 0b001; // imm6 = 001xxx
3707 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3708 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3709 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3711 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3712 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3713 let Inst{21} = 0b1; // imm6 = 1xxxxx
3715 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3716 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3719 // 128-bit vector types.
3720 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3721 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3722 let Inst{21-19} = 0b001; // imm6 = 001xxx
3724 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3725 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3726 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3728 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3729 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3730 let Inst{21} = 0b1; // imm6 = 1xxxxx
3732 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3733 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3737 // Neon Shift-Accumulate vector operations,
3738 // element sizes of 8, 16, 32 and 64 bits:
3739 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3740 string OpcodeStr, string Dt, SDNode ShOp> {
3741 // 64-bit vector types.
3742 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3743 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
3744 let Inst{21-19} = 0b001; // imm6 = 001xxx
3746 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3747 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
3748 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3750 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3751 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
3752 let Inst{21} = 0b1; // imm6 = 1xxxxx
3754 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3755 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
3758 // 128-bit vector types.
3759 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3760 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
3761 let Inst{21-19} = 0b001; // imm6 = 001xxx
3763 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3764 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
3765 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3767 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3768 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
3769 let Inst{21} = 0b1; // imm6 = 1xxxxx
3771 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3772 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
3776 // Neon Shift-Insert vector operations,
3777 // with f of either N2RegVShLFrm or N2RegVShRFrm
3778 // element sizes of 8, 16, 32 and 64 bits:
3779 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3781 // 64-bit vector types.
3782 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3783 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
3784 let Inst{21-19} = 0b001; // imm6 = 001xxx
3786 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3787 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
3788 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3790 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3791 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
3792 let Inst{21} = 0b1; // imm6 = 1xxxxx
3794 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
3795 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
3798 // 128-bit vector types.
3799 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3800 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
3801 let Inst{21-19} = 0b001; // imm6 = 001xxx
3803 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3804 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
3805 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3807 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3808 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
3809 let Inst{21} = 0b1; // imm6 = 1xxxxx
3811 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
3812 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
3815 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3817 // 64-bit vector types.
3818 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
3819 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
3820 let Inst{21-19} = 0b001; // imm6 = 001xxx
3822 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
3823 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
3824 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3826 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
3827 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
3828 let Inst{21} = 0b1; // imm6 = 1xxxxx
3830 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
3831 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
3834 // 128-bit vector types.
3835 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
3836 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
3837 let Inst{21-19} = 0b001; // imm6 = 001xxx
3839 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
3840 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
3841 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3843 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
3844 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
3845 let Inst{21} = 0b1; // imm6 = 1xxxxx
3847 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
3848 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
3852 // Neon Shift Long operations,
3853 // element sizes of 8, 16, 32 bits:
3854 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
3855 bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
3856 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3857 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
3858 let Inst{21-19} = 0b001; // imm6 = 001xxx
3860 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3861 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
3862 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3864 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3865 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
3866 let Inst{21} = 0b1; // imm6 = 1xxxxx
3870 // Neon Shift Narrow operations,
3871 // element sizes of 16, 32, 64 bits:
3872 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
3873 bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
3875 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3876 OpcodeStr, !strconcat(Dt, "16"),
3877 v8i8, v8i16, shr_imm8, OpNode> {
3878 let Inst{21-19} = 0b001; // imm6 = 001xxx
3880 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3881 OpcodeStr, !strconcat(Dt, "32"),
3882 v4i16, v4i32, shr_imm16, OpNode> {
3883 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3885 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3886 OpcodeStr, !strconcat(Dt, "64"),
3887 v2i32, v2i64, shr_imm32, OpNode> {
3888 let Inst{21} = 0b1; // imm6 = 1xxxxx
3892 //===----------------------------------------------------------------------===//
3893 // Instruction Definitions.
3894 //===----------------------------------------------------------------------===//
3896 // Vector Add Operations.
3898 // VADD : Vector Add (integer and floating-point)
3899 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
3901 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
3902 v2f32, v2f32, fadd, 1>;
3903 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
3904 v4f32, v4f32, fadd, 1>;
3905 // VADDL : Vector Add Long (Q = D + D)
3906 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
3907 "vaddl", "s", add, sext, 1>;
3908 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
3909 "vaddl", "u", add, zext, 1>;
3910 // VADDW : Vector Add Wide (Q = Q + D)
3911 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
3912 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
3913 // VHADD : Vector Halving Add
3914 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
3915 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3916 "vhadd", "s", int_arm_neon_vhadds, 1>;
3917 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
3918 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3919 "vhadd", "u", int_arm_neon_vhaddu, 1>;
3920 // VRHADD : Vector Rounding Halving Add
3921 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
3922 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3923 "vrhadd", "s", int_arm_neon_vrhadds, 1>;
3924 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
3925 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3926 "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
3927 // VQADD : Vector Saturating Add
3928 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
3929 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3930 "vqadd", "s", int_arm_neon_vqadds, 1>;
3931 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
3932 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3933 "vqadd", "u", int_arm_neon_vqaddu, 1>;
3934 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
3935 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
3936 int_arm_neon_vaddhn, 1>;
3937 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
3938 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
3939 int_arm_neon_vraddhn, 1>;
3941 // Vector Multiply Operations.
3943 // VMUL : Vector Multiply (integer, polynomial and floating-point)
3944 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
3945 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
3946 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
3947 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
3948 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
3949 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
3950 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
3951 v2f32, v2f32, fmul, 1>;
3952 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
3953 v4f32, v4f32, fmul, 1>;
3954 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
3955 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
3956 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
3959 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
3960 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
3961 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
3962 (v4i16 (EXTRACT_SUBREG QPR:$src2,
3963 (DSubReg_i16_reg imm:$lane))),
3964 (SubReg_i16_lane imm:$lane)))>;
3965 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
3966 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
3967 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
3968 (v2i32 (EXTRACT_SUBREG QPR:$src2,
3969 (DSubReg_i32_reg imm:$lane))),
3970 (SubReg_i32_lane imm:$lane)))>;
3971 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
3972 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
3973 (v4f32 (VMULslfq (v4f32 QPR:$src1),
3974 (v2f32 (EXTRACT_SUBREG QPR:$src2,
3975 (DSubReg_i32_reg imm:$lane))),
3976 (SubReg_i32_lane imm:$lane)))>;
3978 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
3979 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
3980 IIC_VMULi16Q, IIC_VMULi32Q,
3981 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
3982 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
3983 IIC_VMULi16Q, IIC_VMULi32Q,
3984 "vqdmulh", "s", int_arm_neon_vqdmulh>;
3985 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
3986 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
3988 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
3989 (v4i16 (EXTRACT_SUBREG QPR:$src2,
3990 (DSubReg_i16_reg imm:$lane))),
3991 (SubReg_i16_lane imm:$lane)))>;
3992 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
3993 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
3995 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
3996 (v2i32 (EXTRACT_SUBREG QPR:$src2,
3997 (DSubReg_i32_reg imm:$lane))),
3998 (SubReg_i32_lane imm:$lane)))>;
4000 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4001 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4002 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4003 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4004 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4005 IIC_VMULi16Q, IIC_VMULi32Q,
4006 "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
4007 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4008 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
4010 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4011 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4012 (DSubReg_i16_reg imm:$lane))),
4013 (SubReg_i16_lane imm:$lane)))>;
4014 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4015 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
4017 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4018 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4019 (DSubReg_i32_reg imm:$lane))),
4020 (SubReg_i32_lane imm:$lane)))>;
4022 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
4023 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4024 "vmull", "s", NEONvmulls, 1>;
4025 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4026 "vmull", "u", NEONvmullu, 1>;
4027 def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4028 v8i16, v8i8, int_arm_neon_vmullp, 1>;
4029 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4030 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4032 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
4033 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4034 "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4035 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4036 "vqdmull", "s", int_arm_neon_vqdmull>;
4038 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
4040 // VMLA : Vector Multiply Accumulate (integer and floating-point)
4041 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4042 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4043 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4044 v2f32, fmul_su, fadd_mlx>,
4045 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4046 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4047 v4f32, fmul_su, fadd_mlx>,
4048 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4049 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4050 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4051 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4052 v2f32, fmul_su, fadd_mlx>,
4053 Requires<[HasNEON, UseFPVMLx]>;
4054 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4055 v4f32, v2f32, fmul_su, fadd_mlx>,
4056 Requires<[HasNEON, UseFPVMLx]>;
4058 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4059 (mul (v8i16 QPR:$src2),
4060 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4061 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4062 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4063 (DSubReg_i16_reg imm:$lane))),
4064 (SubReg_i16_lane imm:$lane)))>;
4066 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4067 (mul (v4i32 QPR:$src2),
4068 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4069 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4070 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4071 (DSubReg_i32_reg imm:$lane))),
4072 (SubReg_i32_lane imm:$lane)))>;
4074 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4075 (fmul_su (v4f32 QPR:$src2),
4076 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4077 (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4079 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4080 (DSubReg_i32_reg imm:$lane))),
4081 (SubReg_i32_lane imm:$lane)))>,
4082 Requires<[HasNEON, UseFPVMLx]>;
4084 // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
4085 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4086 "vmlal", "s", NEONvmulls, add>;
4087 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4088 "vmlal", "u", NEONvmullu, add>;
4090 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4091 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4093 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4094 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4095 "vqdmlal", "s", int_arm_neon_vqdmlal>;
4096 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
4098 // VMLS : Vector Multiply Subtract (integer and floating-point)
4099 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4100 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4101 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4102 v2f32, fmul_su, fsub_mlx>,
4103 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4104 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4105 v4f32, fmul_su, fsub_mlx>,
4106 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
4107 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4108 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4109 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4110 v2f32, fmul_su, fsub_mlx>,
4111 Requires<[HasNEON, UseFPVMLx]>;
4112 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4113 v4f32, v2f32, fmul_su, fsub_mlx>,
4114 Requires<[HasNEON, UseFPVMLx]>;
4116 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4117 (mul (v8i16 QPR:$src2),
4118 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4119 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4120 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4121 (DSubReg_i16_reg imm:$lane))),
4122 (SubReg_i16_lane imm:$lane)))>;
4124 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4125 (mul (v4i32 QPR:$src2),
4126 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4127 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4128 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4129 (DSubReg_i32_reg imm:$lane))),
4130 (SubReg_i32_lane imm:$lane)))>;
4132 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4133 (fmul_su (v4f32 QPR:$src2),
4134 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4135 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4136 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4137 (DSubReg_i32_reg imm:$lane))),
4138 (SubReg_i32_lane imm:$lane)))>,
4139 Requires<[HasNEON, UseFPVMLx]>;
4141 // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
4142 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4143 "vmlsl", "s", NEONvmulls, sub>;
4144 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4145 "vmlsl", "u", NEONvmullu, sub>;
4147 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4148 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4150 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4151 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4152 "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
4153 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
4155 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4156 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4157 v2f32, fmul_su, fadd_mlx>,
4158 Requires<[HasVFP4,UseFusedMAC]>;
4160 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4161 v4f32, fmul_su, fadd_mlx>,
4162 Requires<[HasVFP4,UseFusedMAC]>;
4164 // Fused Vector Multiply Subtract (floating-point)
4165 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4166 v2f32, fmul_su, fsub_mlx>,
4167 Requires<[HasVFP4,UseFusedMAC]>;
4168 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4169 v4f32, fmul_su, fsub_mlx>,
4170 Requires<[HasVFP4,UseFusedMAC]>;
4172 // Match @llvm.fma.* intrinsics
4173 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4174 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4175 Requires<[HasVFP4]>;
4176 def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4177 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4178 Requires<[HasVFP4]>;
4179 def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4180 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4181 Requires<[HasVFP4]>;
4182 def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4183 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4184 Requires<[HasVFP4]>;
4186 // Vector Subtract Operations.
4188 // VSUB : Vector Subtract (integer and floating-point)
4189 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
4190 "vsub", "i", sub, 0>;
4191 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
4192 v2f32, v2f32, fsub, 0>;
4193 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
4194 v4f32, v4f32, fsub, 0>;
4195 // VSUBL : Vector Subtract Long (Q = D - D)
4196 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4197 "vsubl", "s", sub, sext, 0>;
4198 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4199 "vsubl", "u", sub, zext, 0>;
4200 // VSUBW : Vector Subtract Wide (Q = Q - D)
4201 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
4202 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
4203 // VHSUB : Vector Halving Subtract
4204 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
4205 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4206 "vhsub", "s", int_arm_neon_vhsubs, 0>;
4207 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
4208 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4209 "vhsub", "u", int_arm_neon_vhsubu, 0>;
4210 // VQSUB : Vector Saturing Subtract
4211 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
4212 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4213 "vqsub", "s", int_arm_neon_vqsubs, 0>;
4214 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
4215 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4216 "vqsub", "u", int_arm_neon_vqsubu, 0>;
4217 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
4218 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
4219 int_arm_neon_vsubhn, 0>;
4220 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
4221 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
4222 int_arm_neon_vrsubhn, 0>;
4224 // Vector Comparisons.
4226 // VCEQ : Vector Compare Equal
4227 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4228 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
4229 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
4231 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
4234 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
4235 "$Vd, $Vm, #0", NEONvceqz>;
4237 // VCGE : Vector Compare Greater Than or Equal
4238 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4239 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
4240 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4241 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
4242 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
4244 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
4247 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
4248 "$Vd, $Vm, #0", NEONvcgez>;
4249 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
4250 "$Vd, $Vm, #0", NEONvclez>;
4252 // VCGT : Vector Compare Greater Than
4253 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4254 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
4255 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4256 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
4257 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
4259 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
4262 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
4263 "$Vd, $Vm, #0", NEONvcgtz>;
4264 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
4265 "$Vd, $Vm, #0", NEONvcltz>;
4267 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
4268 def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
4269 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
4270 def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
4271 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
4272 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
4273 def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
4274 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
4275 def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
4276 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
4277 // VTST : Vector Test Bits
4278 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
4279 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
4281 // Vector Bitwise Operations.
4283 def vnotd : PatFrag<(ops node:$in),
4284 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
4285 def vnotq : PatFrag<(ops node:$in),
4286 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
4289 // VAND : Vector Bitwise AND
4290 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
4291 v2i32, v2i32, and, 1>;
4292 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
4293 v4i32, v4i32, and, 1>;
4295 // VEOR : Vector Bitwise Exclusive OR
4296 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
4297 v2i32, v2i32, xor, 1>;
4298 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
4299 v4i32, v4i32, xor, 1>;
4301 // VORR : Vector Bitwise OR
4302 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
4303 v2i32, v2i32, or, 1>;
4304 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
4305 v4i32, v4i32, or, 1>;
4307 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
4308 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
4310 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
4312 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
4313 let Inst{9} = SIMM{9};
4316 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
4317 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
4319 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
4321 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
4322 let Inst{10-9} = SIMM{10-9};
4325 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
4326 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
4328 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
4330 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
4331 let Inst{9} = SIMM{9};
4334 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
4335 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
4337 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
4339 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
4340 let Inst{10-9} = SIMM{10-9};
4344 // VBIC : Vector Bitwise Bit Clear (AND NOT)
4345 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
4346 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
4347 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
4348 "vbic", "$Vd, $Vn, $Vm", "",
4349 [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
4350 (vnotd DPR:$Vm))))]>;
4351 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
4352 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
4353 "vbic", "$Vd, $Vn, $Vm", "",
4354 [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
4355 (vnotq QPR:$Vm))))]>;
4358 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
4359 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
4361 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
4363 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
4364 let Inst{9} = SIMM{9};
4367 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
4368 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
4370 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
4372 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
4373 let Inst{10-9} = SIMM{10-9};
4376 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
4377 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
4379 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
4381 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
4382 let Inst{9} = SIMM{9};
4385 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
4386 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
4388 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
4390 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
4391 let Inst{10-9} = SIMM{10-9};
4394 // VORN : Vector Bitwise OR NOT
4395 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
4396 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
4397 "vorn", "$Vd, $Vn, $Vm", "",
4398 [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
4399 (vnotd DPR:$Vm))))]>;
4400 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
4401 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
4402 "vorn", "$Vd, $Vn, $Vm", "",
4403 [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
4404 (vnotq QPR:$Vm))))]>;
4406 // VMVN : Vector Bitwise NOT (Immediate)
4408 let isReMaterializable = 1 in {
4410 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
4411 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4412 "vmvn", "i16", "$Vd, $SIMM", "",
4413 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
4414 let Inst{9} = SIMM{9};
4417 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
4418 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4419 "vmvn", "i16", "$Vd, $SIMM", "",
4420 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
4421 let Inst{9} = SIMM{9};
4424 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
4425 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4426 "vmvn", "i32", "$Vd, $SIMM", "",
4427 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
4428 let Inst{11-8} = SIMM{11-8};
4431 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
4432 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4433 "vmvn", "i32", "$Vd, $SIMM", "",
4434 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
4435 let Inst{11-8} = SIMM{11-8};
4439 // VMVN : Vector Bitwise NOT
4440 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
4441 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
4442 "vmvn", "$Vd, $Vm", "",
4443 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
4444 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
4445 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
4446 "vmvn", "$Vd, $Vm", "",
4447 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
4448 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
4449 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
4451 // VBSL : Vector Bitwise Select
4452 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
4453 (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4454 N3RegFrm, IIC_VCNTiD,
4455 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4457 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
4459 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
4460 (and DPR:$Vm, (vnotd DPR:$Vd)))),
4461 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
4463 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
4464 (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4465 N3RegFrm, IIC_VCNTiQ,
4466 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4468 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
4470 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
4471 (and QPR:$Vm, (vnotq QPR:$Vd)))),
4472 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
4474 // VBIF : Vector Bitwise Insert if False
4475 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
4476 // FIXME: This instruction's encoding MAY NOT BE correct.
4477 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
4478 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4479 N3RegFrm, IIC_VBINiD,
4480 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4482 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
4483 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4484 N3RegFrm, IIC_VBINiQ,
4485 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4488 // VBIT : Vector Bitwise Insert if True
4489 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
4490 // FIXME: This instruction's encoding MAY NOT BE correct.
4491 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
4492 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4493 N3RegFrm, IIC_VBINiD,
4494 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4496 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
4497 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4498 N3RegFrm, IIC_VBINiQ,
4499 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4502 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
4503 // for equivalent operations with different register constraints; it just
4506 // Vector Absolute Differences.
4508 // VABD : Vector Absolute Difference
4509 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
4510 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4511 "vabd", "s", int_arm_neon_vabds, 1>;
4512 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
4513 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4514 "vabd", "u", int_arm_neon_vabdu, 1>;
4515 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
4516 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
4517 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
4518 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
4520 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
4521 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
4522 "vabdl", "s", int_arm_neon_vabds, zext, 1>;
4523 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
4524 "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
4526 // VABA : Vector Absolute Difference and Accumulate
4527 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
4528 "vaba", "s", int_arm_neon_vabds, add>;
4529 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
4530 "vaba", "u", int_arm_neon_vabdu, add>;
4532 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
4533 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
4534 "vabal", "s", int_arm_neon_vabds, zext, add>;
4535 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
4536 "vabal", "u", int_arm_neon_vabdu, zext, add>;
4538 // Vector Maximum and Minimum.
4540 // VMAX : Vector Maximum
4541 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
4542 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4543 "vmax", "s", int_arm_neon_vmaxs, 1>;
4544 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
4545 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4546 "vmax", "u", int_arm_neon_vmaxu, 1>;
4547 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
4549 v2f32, v2f32, int_arm_neon_vmaxs, 1>;
4550 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
4552 v4f32, v4f32, int_arm_neon_vmaxs, 1>;
4554 // VMIN : Vector Minimum
4555 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
4556 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4557 "vmin", "s", int_arm_neon_vmins, 1>;
4558 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
4559 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4560 "vmin", "u", int_arm_neon_vminu, 1>;
4561 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
4563 v2f32, v2f32, int_arm_neon_vmins, 1>;
4564 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
4566 v4f32, v4f32, int_arm_neon_vmins, 1>;
4568 // Vector Pairwise Operations.
4570 // VPADD : Vector Pairwise Add
4571 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4573 v8i8, v8i8, int_arm_neon_vpadd, 0>;
4574 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4576 v4i16, v4i16, int_arm_neon_vpadd, 0>;
4577 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4579 v2i32, v2i32, int_arm_neon_vpadd, 0>;
4580 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
4581 IIC_VPBIND, "vpadd", "f32",
4582 v2f32, v2f32, int_arm_neon_vpadd, 0>;
4584 // VPADDL : Vector Pairwise Add Long
4585 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
4586 int_arm_neon_vpaddls>;
4587 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
4588 int_arm_neon_vpaddlu>;
4590 // VPADAL : Vector Pairwise Add and Accumulate Long
4591 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
4592 int_arm_neon_vpadals>;
4593 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
4594 int_arm_neon_vpadalu>;
4596 // VPMAX : Vector Pairwise Maximum
4597 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4598 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
4599 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4600 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
4601 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4602 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
4603 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4604 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
4605 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4606 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
4607 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4608 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
4609 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
4610 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
4612 // VPMIN : Vector Pairwise Minimum
4613 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4614 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
4615 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4616 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
4617 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4618 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
4619 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4620 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
4621 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4622 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
4623 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4624 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
4625 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
4626 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
4628 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
4630 // VRECPE : Vector Reciprocal Estimate
4631 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
4632 IIC_VUNAD, "vrecpe", "u32",
4633 v2i32, v2i32, int_arm_neon_vrecpe>;
4634 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
4635 IIC_VUNAQ, "vrecpe", "u32",
4636 v4i32, v4i32, int_arm_neon_vrecpe>;
4637 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
4638 IIC_VUNAD, "vrecpe", "f32",
4639 v2f32, v2f32, int_arm_neon_vrecpe>;
4640 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
4641 IIC_VUNAQ, "vrecpe", "f32",
4642 v4f32, v4f32, int_arm_neon_vrecpe>;
4644 // VRECPS : Vector Reciprocal Step
4645 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
4646 IIC_VRECSD, "vrecps", "f32",
4647 v2f32, v2f32, int_arm_neon_vrecps, 1>;
4648 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
4649 IIC_VRECSQ, "vrecps", "f32",
4650 v4f32, v4f32, int_arm_neon_vrecps, 1>;
4652 // VRSQRTE : Vector Reciprocal Square Root Estimate
4653 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
4654 IIC_VUNAD, "vrsqrte", "u32",
4655 v2i32, v2i32, int_arm_neon_vrsqrte>;
4656 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
4657 IIC_VUNAQ, "vrsqrte", "u32",
4658 v4i32, v4i32, int_arm_neon_vrsqrte>;
4659 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
4660 IIC_VUNAD, "vrsqrte", "f32",
4661 v2f32, v2f32, int_arm_neon_vrsqrte>;
4662 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
4663 IIC_VUNAQ, "vrsqrte", "f32",
4664 v4f32, v4f32, int_arm_neon_vrsqrte>;
4666 // VRSQRTS : Vector Reciprocal Square Root Step
4667 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
4668 IIC_VRECSD, "vrsqrts", "f32",
4669 v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
4670 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
4671 IIC_VRECSQ, "vrsqrts", "f32",
4672 v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
4676 // VSHL : Vector Shift
4677 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
4678 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
4679 "vshl", "s", int_arm_neon_vshifts>;
4680 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
4681 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
4682 "vshl", "u", int_arm_neon_vshiftu>;
4684 // VSHL : Vector Shift Left (Immediate)
4685 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
4687 // VSHR : Vector Shift Right (Immediate)
4688 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
4690 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
4693 // VSHLL : Vector Shift Left Long
4694 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
4695 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
4697 // VSHLL : Vector Shift Left Long (with maximum shift count)
4698 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
4699 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
4700 ValueType OpTy, Operand ImmTy, SDNode OpNode>
4701 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
4702 ResTy, OpTy, ImmTy, OpNode> {
4703 let Inst{21-16} = op21_16;
4704 let DecoderMethod = "DecodeVSHLMaxInstruction";
4706 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
4707 v8i16, v8i8, imm8, NEONvshlli>;
4708 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
4709 v4i32, v4i16, imm16, NEONvshlli>;
4710 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
4711 v2i64, v2i32, imm32, NEONvshlli>;
4713 // VSHRN : Vector Shift Right and Narrow
4714 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
4717 // VRSHL : Vector Rounding Shift
4718 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
4719 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4720 "vrshl", "s", int_arm_neon_vrshifts>;
4721 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
4722 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4723 "vrshl", "u", int_arm_neon_vrshiftu>;
4724 // VRSHR : Vector Rounding Shift Right
4725 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
4727 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
4730 // VRSHRN : Vector Rounding Shift Right and Narrow
4731 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
4734 // VQSHL : Vector Saturating Shift
4735 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
4736 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4737 "vqshl", "s", int_arm_neon_vqshifts>;
4738 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
4739 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4740 "vqshl", "u", int_arm_neon_vqshiftu>;
4741 // VQSHL : Vector Saturating Shift Left (Immediate)
4742 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
4743 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
4745 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
4746 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
4748 // VQSHRN : Vector Saturating Shift Right and Narrow
4749 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
4751 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
4754 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
4755 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
4758 // VQRSHL : Vector Saturating Rounding Shift
4759 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
4760 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4761 "vqrshl", "s", int_arm_neon_vqrshifts>;
4762 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
4763 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4764 "vqrshl", "u", int_arm_neon_vqrshiftu>;
4766 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
4767 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
4769 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
4772 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
4773 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
4776 // VSRA : Vector Shift Right and Accumulate
4777 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
4778 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
4779 // VRSRA : Vector Rounding Shift Right and Accumulate
4780 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
4781 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
4783 // VSLI : Vector Shift Left and Insert
4784 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
4786 // VSRI : Vector Shift Right and Insert
4787 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
4789 // Vector Absolute and Saturating Absolute.
4791 // VABS : Vector Absolute Value
4792 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
4793 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
4795 def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
4796 IIC_VUNAD, "vabs", "f32",
4797 v2f32, v2f32, int_arm_neon_vabs>;
4798 def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
4799 IIC_VUNAQ, "vabs", "f32",
4800 v4f32, v4f32, int_arm_neon_vabs>;
4802 // VQABS : Vector Saturating Absolute Value
4803 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
4804 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
4805 int_arm_neon_vqabs>;
4809 def vnegd : PatFrag<(ops node:$in),
4810 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
4811 def vnegq : PatFrag<(ops node:$in),
4812 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
4814 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
4815 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
4816 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
4817 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
4818 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
4819 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
4820 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
4821 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
4823 // VNEG : Vector Negate (integer)
4824 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
4825 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
4826 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
4827 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
4828 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
4829 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
4831 // VNEG : Vector Negate (floating-point)
4832 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
4833 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
4834 "vneg", "f32", "$Vd, $Vm", "",
4835 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
4836 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
4837 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
4838 "vneg", "f32", "$Vd, $Vm", "",
4839 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
4841 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
4842 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
4843 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
4844 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
4845 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
4846 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
4848 // VQNEG : Vector Saturating Negate
4849 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
4850 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
4851 int_arm_neon_vqneg>;
4853 // Vector Bit Counting Operations.
4855 // VCLS : Vector Count Leading Sign Bits
4856 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
4857 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
4859 // VCLZ : Vector Count Leading Zeros
4860 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
4861 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
4863 // VCNT : Vector Count One Bits
4864 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
4865 IIC_VCNTiD, "vcnt", "8",
4867 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
4868 IIC_VCNTiQ, "vcnt", "8",
4869 v16i8, v16i8, ctpop>;
4872 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
4873 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
4874 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
4876 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
4877 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
4878 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
4881 // Vector Move Operations.
4883 // VMOV : Vector Move (Register)
4884 def : InstAlias<"vmov${p} $Vd, $Vm",
4885 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
4886 def : InstAlias<"vmov${p} $Vd, $Vm",
4887 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
4889 // VMOV : Vector Move (Immediate)
4891 let isReMaterializable = 1 in {
4892 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
4893 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
4894 "vmov", "i8", "$Vd, $SIMM", "",
4895 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
4896 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
4897 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
4898 "vmov", "i8", "$Vd, $SIMM", "",
4899 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
4901 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
4902 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4903 "vmov", "i16", "$Vd, $SIMM", "",
4904 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
4905 let Inst{9} = SIMM{9};
4908 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
4909 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4910 "vmov", "i16", "$Vd, $SIMM", "",
4911 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
4912 let Inst{9} = SIMM{9};
4915 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
4916 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4917 "vmov", "i32", "$Vd, $SIMM", "",
4918 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
4919 let Inst{11-8} = SIMM{11-8};
4922 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
4923 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4924 "vmov", "i32", "$Vd, $SIMM", "",
4925 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
4926 let Inst{11-8} = SIMM{11-8};
4929 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
4930 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
4931 "vmov", "i64", "$Vd, $SIMM", "",
4932 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
4933 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
4934 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
4935 "vmov", "i64", "$Vd, $SIMM", "",
4936 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
4938 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
4939 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
4940 "vmov", "f32", "$Vd, $SIMM", "",
4941 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
4942 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
4943 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
4944 "vmov", "f32", "$Vd, $SIMM", "",
4945 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
4946 } // isReMaterializable
4948 // VMOV : Vector Get Lane (move scalar to ARM core register)
4950 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
4951 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
4952 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
4953 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
4955 let Inst{21} = lane{2};
4956 let Inst{6-5} = lane{1-0};
4958 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
4959 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
4960 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
4961 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
4963 let Inst{21} = lane{1};
4964 let Inst{6} = lane{0};
4966 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
4967 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
4968 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
4969 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
4971 let Inst{21} = lane{2};
4972 let Inst{6-5} = lane{1-0};
4974 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
4975 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
4976 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
4977 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
4979 let Inst{21} = lane{1};
4980 let Inst{6} = lane{0};
4982 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
4983 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
4984 IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
4985 [(set GPR:$R, (extractelt (v2i32 DPR:$V),
4987 let Inst{21} = lane{0};
4989 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
4990 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
4991 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
4992 (DSubReg_i8_reg imm:$lane))),
4993 (SubReg_i8_lane imm:$lane))>;
4994 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
4995 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
4996 (DSubReg_i16_reg imm:$lane))),
4997 (SubReg_i16_lane imm:$lane))>;
4998 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
4999 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
5000 (DSubReg_i8_reg imm:$lane))),
5001 (SubReg_i8_lane imm:$lane))>;
5002 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
5003 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
5004 (DSubReg_i16_reg imm:$lane))),
5005 (SubReg_i16_lane imm:$lane))>;
5006 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
5007 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
5008 (DSubReg_i32_reg imm:$lane))),
5009 (SubReg_i32_lane imm:$lane))>;
5010 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
5011 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
5012 (SSubReg_f32_reg imm:$src2))>;
5013 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
5014 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
5015 (SSubReg_f32_reg imm:$src2))>;
5016 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
5017 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
5018 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
5019 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
5022 // VMOV : Vector Set Lane (move ARM core register to scalar)
5024 let Constraints = "$src1 = $V" in {
5025 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
5026 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
5027 IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
5028 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
5029 GPR:$R, imm:$lane))]> {
5030 let Inst{21} = lane{2};
5031 let Inst{6-5} = lane{1-0};
5033 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
5034 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
5035 IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
5036 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
5037 GPR:$R, imm:$lane))]> {
5038 let Inst{21} = lane{1};
5039 let Inst{6} = lane{0};
5041 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
5042 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
5043 IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
5044 [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
5045 GPR:$R, imm:$lane))]> {
5046 let Inst{21} = lane{0};
5049 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
5050 (v16i8 (INSERT_SUBREG QPR:$src1,
5051 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
5052 (DSubReg_i8_reg imm:$lane))),
5053 GPR:$src2, (SubReg_i8_lane imm:$lane))),
5054 (DSubReg_i8_reg imm:$lane)))>;
5055 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
5056 (v8i16 (INSERT_SUBREG QPR:$src1,
5057 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
5058 (DSubReg_i16_reg imm:$lane))),
5059 GPR:$src2, (SubReg_i16_lane imm:$lane))),
5060 (DSubReg_i16_reg imm:$lane)))>;
5061 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
5062 (v4i32 (INSERT_SUBREG QPR:$src1,
5063 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
5064 (DSubReg_i32_reg imm:$lane))),
5065 GPR:$src2, (SubReg_i32_lane imm:$lane))),
5066 (DSubReg_i32_reg imm:$lane)))>;
5068 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
5069 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
5070 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
5071 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
5072 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
5073 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
5075 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
5076 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
5077 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
5078 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
5080 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
5081 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
5082 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
5083 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
5084 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
5085 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
5087 def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
5088 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5089 def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
5090 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5091 def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
5092 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5094 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
5095 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5096 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5098 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
5099 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
5100 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5102 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
5103 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
5104 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5107 // VDUP : Vector Duplicate (from ARM core register to all elements)
5109 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
5110 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
5111 IIC_VMOVIS, "vdup", Dt, "$V, $R",
5112 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
5113 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
5114 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
5115 IIC_VMOVIS, "vdup", Dt, "$V, $R",
5116 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
5118 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
5119 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
5120 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
5121 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
5122 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
5123 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
5125 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
5126 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
5128 // VDUP : Vector Duplicate Lane (from scalar to all elements)
5130 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
5131 ValueType Ty, Operand IdxTy>
5132 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
5133 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
5134 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
5136 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
5137 ValueType ResTy, ValueType OpTy, Operand IdxTy>
5138 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
5139 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
5140 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
5141 VectorIndex32:$lane)))]>;
5143 // Inst{19-16} is partially specified depending on the element size.
5145 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
5147 let Inst{19-17} = lane{2-0};
5149 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
5151 let Inst{19-18} = lane{1-0};
5153 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
5155 let Inst{19} = lane{0};
5157 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
5159 let Inst{19-17} = lane{2-0};
5161 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
5163 let Inst{19-18} = lane{1-0};
5165 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
5167 let Inst{19} = lane{0};
5170 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
5171 (VDUPLN32d DPR:$Vm, imm:$lane)>;
5173 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
5174 (VDUPLN32q DPR:$Vm, imm:$lane)>;
5176 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
5177 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
5178 (DSubReg_i8_reg imm:$lane))),
5179 (SubReg_i8_lane imm:$lane)))>;
5180 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
5181 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
5182 (DSubReg_i16_reg imm:$lane))),
5183 (SubReg_i16_lane imm:$lane)))>;
5184 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
5185 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
5186 (DSubReg_i32_reg imm:$lane))),
5187 (SubReg_i32_lane imm:$lane)))>;
5188 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
5189 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
5190 (DSubReg_i32_reg imm:$lane))),
5191 (SubReg_i32_lane imm:$lane)))>;
5193 def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
5194 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
5195 def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
5196 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
5198 // VMOVN : Vector Narrowing Move
5199 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
5200 "vmovn", "i", trunc>;
5201 // VQMOVN : Vector Saturating Narrowing Move
5202 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
5203 "vqmovn", "s", int_arm_neon_vqmovns>;
5204 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
5205 "vqmovn", "u", int_arm_neon_vqmovnu>;
5206 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
5207 "vqmovun", "s", int_arm_neon_vqmovnsu>;
5208 // VMOVL : Vector Lengthening Move
5209 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
5210 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
5211 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
5212 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
5213 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
5215 // Vector Conversions.
5217 // VCVT : Vector Convert Between Floating-Point and Integers
5218 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
5219 v2i32, v2f32, fp_to_sint>;
5220 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
5221 v2i32, v2f32, fp_to_uint>;
5222 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
5223 v2f32, v2i32, sint_to_fp>;
5224 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
5225 v2f32, v2i32, uint_to_fp>;
5227 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
5228 v4i32, v4f32, fp_to_sint>;
5229 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
5230 v4i32, v4f32, fp_to_uint>;
5231 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
5232 v4f32, v4i32, sint_to_fp>;
5233 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
5234 v4f32, v4i32, uint_to_fp>;
5236 // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
5237 let DecoderMethod = "DecodeVCVTD" in {
5238 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
5239 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
5240 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
5241 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
5242 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
5243 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
5244 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
5245 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
5248 let DecoderMethod = "DecodeVCVTQ" in {
5249 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
5250 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
5251 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
5252 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
5253 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
5254 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
5255 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
5256 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
5259 // VCVT : Vector Convert Between Half-Precision and Single-Precision.
5260 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
5261 IIC_VUNAQ, "vcvt", "f16.f32",
5262 v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
5263 Requires<[HasNEON, HasFP16]>;
5264 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
5265 IIC_VUNAQ, "vcvt", "f32.f16",
5266 v4f32, v4i16, int_arm_neon_vcvthf2fp>,
5267 Requires<[HasNEON, HasFP16]>;
5271 // VREV64 : Vector Reverse elements within 64-bit doublewords
5273 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5274 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
5275 (ins DPR:$Vm), IIC_VMOVD,
5276 OpcodeStr, Dt, "$Vd, $Vm", "",
5277 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
5278 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5279 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
5280 (ins QPR:$Vm), IIC_VMOVQ,
5281 OpcodeStr, Dt, "$Vd, $Vm", "",
5282 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
5284 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
5285 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
5286 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
5287 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
5289 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
5290 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
5291 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
5292 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
5294 // VREV32 : Vector Reverse elements within 32-bit words
5296 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5297 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
5298 (ins DPR:$Vm), IIC_VMOVD,
5299 OpcodeStr, Dt, "$Vd, $Vm", "",
5300 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
5301 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5302 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
5303 (ins QPR:$Vm), IIC_VMOVQ,
5304 OpcodeStr, Dt, "$Vd, $Vm", "",
5305 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
5307 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
5308 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
5310 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
5311 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
5313 // VREV16 : Vector Reverse elements within 16-bit halfwords
5315 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5316 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
5317 (ins DPR:$Vm), IIC_VMOVD,
5318 OpcodeStr, Dt, "$Vd, $Vm", "",
5319 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
5320 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5321 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
5322 (ins QPR:$Vm), IIC_VMOVQ,
5323 OpcodeStr, Dt, "$Vd, $Vm", "",
5324 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
5326 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
5327 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
5329 // Other Vector Shuffles.
5331 // Aligned extractions: really just dropping registers
5333 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
5334 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
5335 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
5337 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
5339 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
5341 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
5343 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
5345 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
5348 // VEXT : Vector Extract
5351 // All of these have a two-operand InstAlias.
5352 let TwoOperandAliasConstraint = "$Vn = $Vd" in {
5353 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
5354 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
5355 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
5356 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
5357 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
5358 (Ty DPR:$Vm), imm:$index)))]> {
5360 let Inst{11-8} = index{3-0};
5363 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
5364 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
5365 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
5366 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
5367 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
5368 (Ty QPR:$Vm), imm:$index)))]> {
5370 let Inst{11-8} = index{3-0};
5374 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
5375 let Inst{11-8} = index{3-0};
5377 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
5378 let Inst{11-9} = index{2-0};
5381 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
5382 let Inst{11-10} = index{1-0};
5383 let Inst{9-8} = 0b00;
5385 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
5388 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
5390 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
5391 let Inst{11-8} = index{3-0};
5393 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
5394 let Inst{11-9} = index{2-0};
5397 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
5398 let Inst{11-10} = index{1-0};
5399 let Inst{9-8} = 0b00;
5401 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
5402 let Inst{11} = index{0};
5403 let Inst{10-8} = 0b000;
5405 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
5408 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
5410 // VTRN : Vector Transpose
5412 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
5413 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
5414 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
5416 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
5417 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
5418 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
5420 // VUZP : Vector Unzip (Deinterleave)
5422 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
5423 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
5424 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
5425 def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
5426 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
5428 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
5429 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
5430 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
5432 // VZIP : Vector Zip (Interleave)
5434 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
5435 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
5436 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
5437 def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
5438 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
5440 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
5441 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
5442 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
5444 // Vector Table Lookup and Table Extension.
5446 // VTBL : Vector Table Lookup
5447 let DecoderMethod = "DecodeTBLInstruction" in {
5449 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
5450 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
5451 "vtbl", "8", "$Vd, $Vn, $Vm", "",
5452 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
5453 let hasExtraSrcRegAllocReq = 1 in {
5455 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
5456 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
5457 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
5459 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
5460 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
5461 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
5463 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
5464 (ins VecListFourD:$Vn, DPR:$Vm),
5466 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
5467 } // hasExtraSrcRegAllocReq = 1
5470 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
5472 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
5474 // VTBX : Vector Table Extension
5476 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
5477 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
5478 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
5479 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
5480 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
5481 let hasExtraSrcRegAllocReq = 1 in {
5483 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
5484 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
5485 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
5487 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
5488 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
5489 NVTBLFrm, IIC_VTBX3,
5490 "vtbx", "8", "$Vd, $Vn, $Vm",
5493 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
5494 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
5495 "vtbx", "8", "$Vd, $Vn, $Vm",
5497 } // hasExtraSrcRegAllocReq = 1
5500 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
5501 IIC_VTBX3, "$orig = $dst", []>;
5503 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
5504 IIC_VTBX4, "$orig = $dst", []>;
5505 } // DecoderMethod = "DecodeTBLInstruction"
5507 //===----------------------------------------------------------------------===//
5508 // NEON instructions for single-precision FP math
5509 //===----------------------------------------------------------------------===//
5511 class N2VSPat<SDNode OpNode, NeonI Inst>
5512 : NEONFPPat<(f32 (OpNode SPR:$a)),
5514 (v2f32 (COPY_TO_REGCLASS (Inst
5516 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5517 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
5519 class N3VSPat<SDNode OpNode, NeonI Inst>
5520 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
5522 (v2f32 (COPY_TO_REGCLASS (Inst
5524 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5527 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5528 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
5530 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
5531 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
5533 (v2f32 (COPY_TO_REGCLASS (Inst
5535 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5538 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5541 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5542 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
5544 def : N3VSPat<fadd, VADDfd>;
5545 def : N3VSPat<fsub, VSUBfd>;
5546 def : N3VSPat<fmul, VMULfd>;
5547 def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
5548 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
5549 def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
5550 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
5551 def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
5552 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
5553 def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
5554 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
5555 def : N2VSPat<fabs, VABSfd>;
5556 def : N2VSPat<fneg, VNEGfd>;
5557 def : N3VSPat<NEONfmax, VMAXfd>;
5558 def : N3VSPat<NEONfmin, VMINfd>;
5559 def : N2VSPat<arm_ftosi, VCVTf2sd>;
5560 def : N2VSPat<arm_ftoui, VCVTf2ud>;
5561 def : N2VSPat<arm_sitof, VCVTs2fd>;
5562 def : N2VSPat<arm_uitof, VCVTu2fd>;
5564 //===----------------------------------------------------------------------===//
5565 // Non-Instruction Patterns
5566 //===----------------------------------------------------------------------===//
5569 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
5570 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
5571 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
5572 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
5573 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
5574 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
5575 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
5576 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
5577 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
5578 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
5579 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
5580 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
5581 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
5582 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
5583 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
5584 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
5585 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
5586 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
5587 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
5588 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
5589 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
5590 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
5591 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
5592 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
5593 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
5594 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
5595 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
5596 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
5597 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
5598 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
5600 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
5601 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
5602 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
5603 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
5604 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
5605 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
5606 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
5607 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
5608 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
5609 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
5610 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
5611 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
5612 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
5613 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
5614 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
5615 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
5616 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
5617 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
5618 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
5619 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
5620 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
5621 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
5622 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
5623 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
5624 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
5625 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
5626 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
5627 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
5628 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
5629 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
5631 // Vector lengthening move with load, matching extending loads.
5633 // extload, zextload and sextload for a standard lengthening load. Example:
5634 // Lengthen_Single<"8", "i16", "8"> =
5635 // Pat<(v8i16 (extloadvi8 addrmode6:$addr))
5636 // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
5637 // (f64 (IMPLICIT_DEF)), (i32 0)))>;
5638 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
5639 let AddedComplexity = 10 in {
5640 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5641 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
5642 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
5643 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
5645 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5646 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
5647 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
5648 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
5650 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5651 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
5652 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
5653 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
5657 // extload, zextload and sextload for a lengthening load which only uses
5658 // half the lanes available. Example:
5659 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
5660 // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
5661 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
5662 // (f64 (IMPLICIT_DEF)), (i32 0))),
5664 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
5665 string InsnLanes, string InsnTy> {
5666 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5667 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
5668 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
5669 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5671 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5672 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
5673 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
5674 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5676 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5677 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
5678 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
5679 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5683 // extload, zextload and sextload for a lengthening load followed by another
5684 // lengthening load, to quadruple the initial length.
5686 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
5687 // Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
5688 // (EXTRACT_SUBREG (VMOVLuv4i32
5689 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
5690 // (f64 (IMPLICIT_DEF)),
5694 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
5695 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
5697 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5698 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
5699 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
5700 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
5701 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5703 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5704 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
5705 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
5706 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
5707 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5709 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5710 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
5711 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
5712 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
5713 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5717 // extload, zextload and sextload for a lengthening load followed by another
5718 // lengthening load, to quadruple the initial length, but which ends up only
5719 // requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
5721 // Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
5722 // Pat<(v2i32 (extloadvi8 addrmode6:$addr))
5723 // (EXTRACT_SUBREG (VMOVLuv4i32
5724 // (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
5725 // (f64 (IMPLICIT_DEF)), (i32 0))),
5728 multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
5729 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
5731 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5732 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
5733 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
5734 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
5735 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5738 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5739 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
5740 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
5741 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
5742 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5745 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5746 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
5747 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
5748 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
5749 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
5754 defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
5755 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
5756 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
5758 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
5759 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
5761 // Double lengthening - v4i8 -> v4i16 -> v4i32
5762 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
5763 // v2i8 -> v2i16 -> v2i32
5764 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
5765 // v2i16 -> v2i32 -> v2i64
5766 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
5768 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
5769 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
5770 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
5771 (VLD1LNd16 addrmode6:$addr,
5772 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
5773 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
5774 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
5775 (VLD1LNd16 addrmode6:$addr,
5776 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
5777 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
5778 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
5779 (VLD1LNd16 addrmode6:$addr,
5780 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
5782 //===----------------------------------------------------------------------===//
5783 // Assembler aliases
5786 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
5787 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
5788 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
5789 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
5791 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
5792 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
5793 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
5794 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
5795 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
5796 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
5797 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
5798 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
5799 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
5800 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
5801 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
5802 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
5803 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
5804 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
5805 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
5806 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
5807 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
5808 // ... two-operand aliases
5809 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
5810 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5811 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
5812 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5813 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
5814 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5815 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
5816 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5817 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
5818 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5819 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
5820 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5822 // VLD1 single-lane pseudo-instructions. These need special handling for
5823 // the lane index that an InstAlias can't handle, so we use these instead.
5824 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
5825 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5826 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
5827 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5828 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
5829 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5831 def VLD1LNdWB_fixed_Asm_8 :
5832 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
5833 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5834 def VLD1LNdWB_fixed_Asm_16 :
5835 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
5836 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5837 def VLD1LNdWB_fixed_Asm_32 :
5838 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
5839 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5840 def VLD1LNdWB_register_Asm_8 :
5841 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
5842 (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
5843 rGPR:$Rm, pred:$p)>;
5844 def VLD1LNdWB_register_Asm_16 :
5845 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
5846 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
5847 rGPR:$Rm, pred:$p)>;
5848 def VLD1LNdWB_register_Asm_32 :
5849 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
5850 (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
5851 rGPR:$Rm, pred:$p)>;
5854 // VST1 single-lane pseudo-instructions. These need special handling for
5855 // the lane index that an InstAlias can't handle, so we use these instead.
5856 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
5857 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5858 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
5859 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5860 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
5861 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5863 def VST1LNdWB_fixed_Asm_8 :
5864 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
5865 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5866 def VST1LNdWB_fixed_Asm_16 :
5867 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
5868 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5869 def VST1LNdWB_fixed_Asm_32 :
5870 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
5871 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5872 def VST1LNdWB_register_Asm_8 :
5873 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
5874 (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
5875 rGPR:$Rm, pred:$p)>;
5876 def VST1LNdWB_register_Asm_16 :
5877 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
5878 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
5879 rGPR:$Rm, pred:$p)>;
5880 def VST1LNdWB_register_Asm_32 :
5881 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
5882 (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
5883 rGPR:$Rm, pred:$p)>;
5885 // VLD2 single-lane pseudo-instructions. These need special handling for
5886 // the lane index that an InstAlias can't handle, so we use these instead.
5887 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
5888 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5889 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
5890 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5891 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
5892 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5893 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
5894 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5895 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
5896 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5898 def VLD2LNdWB_fixed_Asm_8 :
5899 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
5900 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5901 def VLD2LNdWB_fixed_Asm_16 :
5902 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
5903 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5904 def VLD2LNdWB_fixed_Asm_32 :
5905 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
5906 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5907 def VLD2LNqWB_fixed_Asm_16 :
5908 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
5909 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5910 def VLD2LNqWB_fixed_Asm_32 :
5911 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
5912 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5913 def VLD2LNdWB_register_Asm_8 :
5914 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
5915 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
5916 rGPR:$Rm, pred:$p)>;
5917 def VLD2LNdWB_register_Asm_16 :
5918 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
5919 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
5920 rGPR:$Rm, pred:$p)>;
5921 def VLD2LNdWB_register_Asm_32 :
5922 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
5923 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
5924 rGPR:$Rm, pred:$p)>;
5925 def VLD2LNqWB_register_Asm_16 :
5926 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
5927 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
5928 rGPR:$Rm, pred:$p)>;
5929 def VLD2LNqWB_register_Asm_32 :
5930 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
5931 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
5932 rGPR:$Rm, pred:$p)>;
5935 // VST2 single-lane pseudo-instructions. These need special handling for
5936 // the lane index that an InstAlias can't handle, so we use these instead.
5937 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
5938 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5939 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
5940 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5941 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
5942 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5943 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
5944 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5945 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
5946 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5948 def VST2LNdWB_fixed_Asm_8 :
5949 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
5950 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5951 def VST2LNdWB_fixed_Asm_16 :
5952 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
5953 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5954 def VST2LNdWB_fixed_Asm_32 :
5955 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
5956 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5957 def VST2LNqWB_fixed_Asm_16 :
5958 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
5959 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5960 def VST2LNqWB_fixed_Asm_32 :
5961 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
5962 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5963 def VST2LNdWB_register_Asm_8 :
5964 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
5965 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
5966 rGPR:$Rm, pred:$p)>;
5967 def VST2LNdWB_register_Asm_16 :
5968 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
5969 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
5970 rGPR:$Rm, pred:$p)>;
5971 def VST2LNdWB_register_Asm_32 :
5972 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
5973 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
5974 rGPR:$Rm, pred:$p)>;
5975 def VST2LNqWB_register_Asm_16 :
5976 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
5977 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
5978 rGPR:$Rm, pred:$p)>;
5979 def VST2LNqWB_register_Asm_32 :
5980 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
5981 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
5982 rGPR:$Rm, pred:$p)>;
5984 // VLD3 all-lanes pseudo-instructions. These need special handling for
5985 // the lane index that an InstAlias can't handle, so we use these instead.
5986 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
5987 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
5988 def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
5989 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
5990 def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
5991 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
5992 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
5993 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
5994 def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
5995 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
5996 def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
5997 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
5999 def VLD3DUPdWB_fixed_Asm_8 :
6000 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6001 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6002 def VLD3DUPdWB_fixed_Asm_16 :
6003 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6004 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6005 def VLD3DUPdWB_fixed_Asm_32 :
6006 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6007 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6008 def VLD3DUPqWB_fixed_Asm_8 :
6009 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6010 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6011 def VLD3DUPqWB_fixed_Asm_16 :
6012 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6013 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6014 def VLD3DUPqWB_fixed_Asm_32 :
6015 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6016 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6017 def VLD3DUPdWB_register_Asm_8 :
6018 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6019 (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
6020 rGPR:$Rm, pred:$p)>;
6021 def VLD3DUPdWB_register_Asm_16 :
6022 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6023 (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
6024 rGPR:$Rm, pred:$p)>;
6025 def VLD3DUPdWB_register_Asm_32 :
6026 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6027 (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
6028 rGPR:$Rm, pred:$p)>;
6029 def VLD3DUPqWB_register_Asm_8 :
6030 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6031 (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
6032 rGPR:$Rm, pred:$p)>;
6033 def VLD3DUPqWB_register_Asm_16 :
6034 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6035 (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
6036 rGPR:$Rm, pred:$p)>;
6037 def VLD3DUPqWB_register_Asm_32 :
6038 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6039 (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
6040 rGPR:$Rm, pred:$p)>;
6043 // VLD3 single-lane pseudo-instructions. These need special handling for
6044 // the lane index that an InstAlias can't handle, so we use these instead.
6045 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
6046 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6047 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6048 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6049 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6050 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6051 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6052 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6053 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6054 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6056 def VLD3LNdWB_fixed_Asm_8 :
6057 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6058 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6059 def VLD3LNdWB_fixed_Asm_16 :
6060 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6061 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6062 def VLD3LNdWB_fixed_Asm_32 :
6063 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6064 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6065 def VLD3LNqWB_fixed_Asm_16 :
6066 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6067 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6068 def VLD3LNqWB_fixed_Asm_32 :
6069 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6070 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6071 def VLD3LNdWB_register_Asm_8 :
6072 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6073 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
6074 rGPR:$Rm, pred:$p)>;
6075 def VLD3LNdWB_register_Asm_16 :
6076 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6077 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
6078 rGPR:$Rm, pred:$p)>;
6079 def VLD3LNdWB_register_Asm_32 :
6080 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6081 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
6082 rGPR:$Rm, pred:$p)>;
6083 def VLD3LNqWB_register_Asm_16 :
6084 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6085 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
6086 rGPR:$Rm, pred:$p)>;
6087 def VLD3LNqWB_register_Asm_32 :
6088 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6089 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
6090 rGPR:$Rm, pred:$p)>;
6092 // VLD3 multiple structure pseudo-instructions. These need special handling for
6093 // the vector operands that the normal instructions don't yet model.
6094 // FIXME: Remove these when the register classes and instructions are updated.
6095 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
6096 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6097 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6098 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6099 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6100 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6101 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
6102 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6103 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6104 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6105 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6106 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6108 def VLD3dWB_fixed_Asm_8 :
6109 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6110 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6111 def VLD3dWB_fixed_Asm_16 :
6112 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6113 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6114 def VLD3dWB_fixed_Asm_32 :
6115 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6116 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6117 def VLD3qWB_fixed_Asm_8 :
6118 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6119 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6120 def VLD3qWB_fixed_Asm_16 :
6121 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6122 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6123 def VLD3qWB_fixed_Asm_32 :
6124 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6125 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6126 def VLD3dWB_register_Asm_8 :
6127 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6128 (ins VecListThreeD:$list, addrmode6:$addr,
6129 rGPR:$Rm, pred:$p)>;
6130 def VLD3dWB_register_Asm_16 :
6131 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6132 (ins VecListThreeD:$list, addrmode6:$addr,
6133 rGPR:$Rm, pred:$p)>;
6134 def VLD3dWB_register_Asm_32 :
6135 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6136 (ins VecListThreeD:$list, addrmode6:$addr,
6137 rGPR:$Rm, pred:$p)>;
6138 def VLD3qWB_register_Asm_8 :
6139 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6140 (ins VecListThreeQ:$list, addrmode6:$addr,
6141 rGPR:$Rm, pred:$p)>;
6142 def VLD3qWB_register_Asm_16 :
6143 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6144 (ins VecListThreeQ:$list, addrmode6:$addr,
6145 rGPR:$Rm, pred:$p)>;
6146 def VLD3qWB_register_Asm_32 :
6147 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6148 (ins VecListThreeQ:$list, addrmode6:$addr,
6149 rGPR:$Rm, pred:$p)>;
6151 // VST3 single-lane pseudo-instructions. These need special handling for
6152 // the lane index that an InstAlias can't handle, so we use these instead.
6153 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
6154 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6155 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
6156 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6157 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
6158 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6159 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
6160 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6161 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
6162 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6164 def VST3LNdWB_fixed_Asm_8 :
6165 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
6166 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6167 def VST3LNdWB_fixed_Asm_16 :
6168 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
6169 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6170 def VST3LNdWB_fixed_Asm_32 :
6171 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
6172 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6173 def VST3LNqWB_fixed_Asm_16 :
6174 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
6175 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6176 def VST3LNqWB_fixed_Asm_32 :
6177 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
6178 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6179 def VST3LNdWB_register_Asm_8 :
6180 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
6181 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
6182 rGPR:$Rm, pred:$p)>;
6183 def VST3LNdWB_register_Asm_16 :
6184 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
6185 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
6186 rGPR:$Rm, pred:$p)>;
6187 def VST3LNdWB_register_Asm_32 :
6188 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
6189 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
6190 rGPR:$Rm, pred:$p)>;
6191 def VST3LNqWB_register_Asm_16 :
6192 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
6193 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
6194 rGPR:$Rm, pred:$p)>;
6195 def VST3LNqWB_register_Asm_32 :
6196 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
6197 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
6198 rGPR:$Rm, pred:$p)>;
6201 // VST3 multiple structure pseudo-instructions. These need special handling for
6202 // the vector operands that the normal instructions don't yet model.
6203 // FIXME: Remove these when the register classes and instructions are updated.
6204 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
6205 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6206 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
6207 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6208 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
6209 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6210 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
6211 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6212 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
6213 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6214 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
6215 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6217 def VST3dWB_fixed_Asm_8 :
6218 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
6219 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6220 def VST3dWB_fixed_Asm_16 :
6221 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
6222 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6223 def VST3dWB_fixed_Asm_32 :
6224 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
6225 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6226 def VST3qWB_fixed_Asm_8 :
6227 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
6228 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6229 def VST3qWB_fixed_Asm_16 :
6230 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
6231 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6232 def VST3qWB_fixed_Asm_32 :
6233 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
6234 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6235 def VST3dWB_register_Asm_8 :
6236 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
6237 (ins VecListThreeD:$list, addrmode6:$addr,
6238 rGPR:$Rm, pred:$p)>;
6239 def VST3dWB_register_Asm_16 :
6240 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
6241 (ins VecListThreeD:$list, addrmode6:$addr,
6242 rGPR:$Rm, pred:$p)>;
6243 def VST3dWB_register_Asm_32 :
6244 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
6245 (ins VecListThreeD:$list, addrmode6:$addr,
6246 rGPR:$Rm, pred:$p)>;
6247 def VST3qWB_register_Asm_8 :
6248 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
6249 (ins VecListThreeQ:$list, addrmode6:$addr,
6250 rGPR:$Rm, pred:$p)>;
6251 def VST3qWB_register_Asm_16 :
6252 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
6253 (ins VecListThreeQ:$list, addrmode6:$addr,
6254 rGPR:$Rm, pred:$p)>;
6255 def VST3qWB_register_Asm_32 :
6256 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
6257 (ins VecListThreeQ:$list, addrmode6:$addr,
6258 rGPR:$Rm, pred:$p)>;
6260 // VLD4 all-lanes pseudo-instructions. These need special handling for
6261 // the lane index that an InstAlias can't handle, so we use these instead.
6262 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6263 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6264 def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6265 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6266 def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6267 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6268 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6269 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6270 def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6271 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6272 def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6273 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6275 def VLD4DUPdWB_fixed_Asm_8 :
6276 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6277 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6278 def VLD4DUPdWB_fixed_Asm_16 :
6279 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6280 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6281 def VLD4DUPdWB_fixed_Asm_32 :
6282 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6283 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6284 def VLD4DUPqWB_fixed_Asm_8 :
6285 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6286 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6287 def VLD4DUPqWB_fixed_Asm_16 :
6288 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6289 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6290 def VLD4DUPqWB_fixed_Asm_32 :
6291 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6292 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6293 def VLD4DUPdWB_register_Asm_8 :
6294 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6295 (ins VecListFourDAllLanes:$list, addrmode6:$addr,
6296 rGPR:$Rm, pred:$p)>;
6297 def VLD4DUPdWB_register_Asm_16 :
6298 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6299 (ins VecListFourDAllLanes:$list, addrmode6:$addr,
6300 rGPR:$Rm, pred:$p)>;
6301 def VLD4DUPdWB_register_Asm_32 :
6302 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6303 (ins VecListFourDAllLanes:$list, addrmode6:$addr,
6304 rGPR:$Rm, pred:$p)>;
6305 def VLD4DUPqWB_register_Asm_8 :
6306 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6307 (ins VecListFourQAllLanes:$list, addrmode6:$addr,
6308 rGPR:$Rm, pred:$p)>;
6309 def VLD4DUPqWB_register_Asm_16 :
6310 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6311 (ins VecListFourQAllLanes:$list, addrmode6:$addr,
6312 rGPR:$Rm, pred:$p)>;
6313 def VLD4DUPqWB_register_Asm_32 :
6314 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6315 (ins VecListFourQAllLanes:$list, addrmode6:$addr,
6316 rGPR:$Rm, pred:$p)>;
6319 // VLD4 single-lane pseudo-instructions. These need special handling for
6320 // the lane index that an InstAlias can't handle, so we use these instead.
6321 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6322 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6323 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6324 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6325 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6326 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6327 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6328 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6329 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6330 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6332 def VLD4LNdWB_fixed_Asm_8 :
6333 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6334 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6335 def VLD4LNdWB_fixed_Asm_16 :
6336 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6337 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6338 def VLD4LNdWB_fixed_Asm_32 :
6339 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6340 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6341 def VLD4LNqWB_fixed_Asm_16 :
6342 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6343 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6344 def VLD4LNqWB_fixed_Asm_32 :
6345 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6346 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6347 def VLD4LNdWB_register_Asm_8 :
6348 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6349 (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
6350 rGPR:$Rm, pred:$p)>;
6351 def VLD4LNdWB_register_Asm_16 :
6352 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6353 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
6354 rGPR:$Rm, pred:$p)>;
6355 def VLD4LNdWB_register_Asm_32 :
6356 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6357 (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
6358 rGPR:$Rm, pred:$p)>;
6359 def VLD4LNqWB_register_Asm_16 :
6360 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6361 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
6362 rGPR:$Rm, pred:$p)>;
6363 def VLD4LNqWB_register_Asm_32 :
6364 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6365 (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
6366 rGPR:$Rm, pred:$p)>;
6370 // VLD4 multiple structure pseudo-instructions. These need special handling for
6371 // the vector operands that the normal instructions don't yet model.
6372 // FIXME: Remove these when the register classes and instructions are updated.
6373 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6374 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6375 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6376 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6377 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6378 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6379 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6380 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6381 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6382 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6383 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6384 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6386 def VLD4dWB_fixed_Asm_8 :
6387 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6388 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6389 def VLD4dWB_fixed_Asm_16 :
6390 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6391 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6392 def VLD4dWB_fixed_Asm_32 :
6393 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6394 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6395 def VLD4qWB_fixed_Asm_8 :
6396 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6397 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6398 def VLD4qWB_fixed_Asm_16 :
6399 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6400 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6401 def VLD4qWB_fixed_Asm_32 :
6402 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6403 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6404 def VLD4dWB_register_Asm_8 :
6405 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6406 (ins VecListFourD:$list, addrmode6:$addr,
6407 rGPR:$Rm, pred:$p)>;
6408 def VLD4dWB_register_Asm_16 :
6409 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6410 (ins VecListFourD:$list, addrmode6:$addr,
6411 rGPR:$Rm, pred:$p)>;
6412 def VLD4dWB_register_Asm_32 :
6413 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6414 (ins VecListFourD:$list, addrmode6:$addr,
6415 rGPR:$Rm, pred:$p)>;
6416 def VLD4qWB_register_Asm_8 :
6417 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6418 (ins VecListFourQ:$list, addrmode6:$addr,
6419 rGPR:$Rm, pred:$p)>;
6420 def VLD4qWB_register_Asm_16 :
6421 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6422 (ins VecListFourQ:$list, addrmode6:$addr,
6423 rGPR:$Rm, pred:$p)>;
6424 def VLD4qWB_register_Asm_32 :
6425 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6426 (ins VecListFourQ:$list, addrmode6:$addr,
6427 rGPR:$Rm, pred:$p)>;
6429 // VST4 single-lane pseudo-instructions. These need special handling for
6430 // the lane index that an InstAlias can't handle, so we use these instead.
6431 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
6432 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6433 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
6434 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6435 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
6436 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6437 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
6438 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6439 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
6440 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6442 def VST4LNdWB_fixed_Asm_8 :
6443 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
6444 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6445 def VST4LNdWB_fixed_Asm_16 :
6446 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
6447 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6448 def VST4LNdWB_fixed_Asm_32 :
6449 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
6450 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6451 def VST4LNqWB_fixed_Asm_16 :
6452 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
6453 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6454 def VST4LNqWB_fixed_Asm_32 :
6455 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
6456 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6457 def VST4LNdWB_register_Asm_8 :
6458 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
6459 (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
6460 rGPR:$Rm, pred:$p)>;
6461 def VST4LNdWB_register_Asm_16 :
6462 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
6463 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
6464 rGPR:$Rm, pred:$p)>;
6465 def VST4LNdWB_register_Asm_32 :
6466 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
6467 (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
6468 rGPR:$Rm, pred:$p)>;
6469 def VST4LNqWB_register_Asm_16 :
6470 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
6471 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
6472 rGPR:$Rm, pred:$p)>;
6473 def VST4LNqWB_register_Asm_32 :
6474 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
6475 (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
6476 rGPR:$Rm, pred:$p)>;
6479 // VST4 multiple structure pseudo-instructions. These need special handling for
6480 // the vector operands that the normal instructions don't yet model.
6481 // FIXME: Remove these when the register classes and instructions are updated.
6482 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
6483 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6484 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
6485 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6486 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
6487 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6488 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
6489 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6490 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
6491 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6492 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
6493 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6495 def VST4dWB_fixed_Asm_8 :
6496 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
6497 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6498 def VST4dWB_fixed_Asm_16 :
6499 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
6500 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6501 def VST4dWB_fixed_Asm_32 :
6502 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
6503 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6504 def VST4qWB_fixed_Asm_8 :
6505 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
6506 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6507 def VST4qWB_fixed_Asm_16 :
6508 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
6509 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6510 def VST4qWB_fixed_Asm_32 :
6511 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
6512 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6513 def VST4dWB_register_Asm_8 :
6514 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
6515 (ins VecListFourD:$list, addrmode6:$addr,
6516 rGPR:$Rm, pred:$p)>;
6517 def VST4dWB_register_Asm_16 :
6518 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
6519 (ins VecListFourD:$list, addrmode6:$addr,
6520 rGPR:$Rm, pred:$p)>;
6521 def VST4dWB_register_Asm_32 :
6522 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
6523 (ins VecListFourD:$list, addrmode6:$addr,
6524 rGPR:$Rm, pred:$p)>;
6525 def VST4qWB_register_Asm_8 :
6526 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
6527 (ins VecListFourQ:$list, addrmode6:$addr,
6528 rGPR:$Rm, pred:$p)>;
6529 def VST4qWB_register_Asm_16 :
6530 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
6531 (ins VecListFourQ:$list, addrmode6:$addr,
6532 rGPR:$Rm, pred:$p)>;
6533 def VST4qWB_register_Asm_32 :
6534 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
6535 (ins VecListFourQ:$list, addrmode6:$addr,
6536 rGPR:$Rm, pred:$p)>;
6538 // VMOV takes an optional datatype suffix
6539 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
6540 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
6541 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
6542 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
6544 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
6545 // D-register versions.
6546 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
6547 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6548 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
6549 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6550 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
6551 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6552 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
6553 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6554 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
6555 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6556 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
6557 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6558 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
6559 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6560 // Q-register versions.
6561 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
6562 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6563 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
6564 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6565 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
6566 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6567 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
6568 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6569 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
6570 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6571 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
6572 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6573 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
6574 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6576 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
6577 // D-register versions.
6578 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
6579 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6580 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
6581 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6582 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
6583 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6584 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
6585 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6586 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
6587 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6588 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
6589 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6590 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
6591 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6592 // Q-register versions.
6593 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
6594 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6595 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
6596 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6597 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
6598 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6599 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
6600 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6601 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
6602 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6603 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
6604 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6605 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
6606 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6608 // VSWP allows, but does not require, a type suffix.
6609 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
6610 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
6611 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
6612 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
6614 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
6615 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
6616 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6617 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
6618 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6619 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
6620 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6621 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
6622 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6623 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
6624 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6625 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
6626 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6628 // "vmov Rd, #-imm" can be handled via "vmvn".
6629 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
6630 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
6631 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
6632 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
6633 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
6634 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
6635 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
6636 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
6638 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
6639 // these should restrict to just the Q register variants, but the register
6640 // classes are enough to match correctly regardless, so we keep it simple
6641 // and just use MnemonicAlias.
6642 def : NEONMnemonicAlias<"vbicq", "vbic">;
6643 def : NEONMnemonicAlias<"vandq", "vand">;
6644 def : NEONMnemonicAlias<"veorq", "veor">;
6645 def : NEONMnemonicAlias<"vorrq", "vorr">;
6647 def : NEONMnemonicAlias<"vmovq", "vmov">;
6648 def : NEONMnemonicAlias<"vmvnq", "vmvn">;
6649 // Explicit versions for floating point so that the FPImm variants get
6650 // handled early. The parser gets confused otherwise.
6651 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
6652 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
6654 def : NEONMnemonicAlias<"vaddq", "vadd">;
6655 def : NEONMnemonicAlias<"vsubq", "vsub">;
6657 def : NEONMnemonicAlias<"vminq", "vmin">;
6658 def : NEONMnemonicAlias<"vmaxq", "vmax">;
6660 def : NEONMnemonicAlias<"vmulq", "vmul">;
6662 def : NEONMnemonicAlias<"vabsq", "vabs">;
6664 def : NEONMnemonicAlias<"vshlq", "vshl">;
6665 def : NEONMnemonicAlias<"vshrq", "vshr">;
6667 def : NEONMnemonicAlias<"vcvtq", "vcvt">;
6669 def : NEONMnemonicAlias<"vcleq", "vcle">;
6670 def : NEONMnemonicAlias<"vceqq", "vceq">;
6672 def : NEONMnemonicAlias<"vzipq", "vzip">;
6673 def : NEONMnemonicAlias<"vswpq", "vswp">;
6675 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
6676 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
6679 // Alias for loading floating point immediates that aren't representable
6680 // using the vmov.f32 encoding but the bitpattern is representable using
6681 // the .i32 encoding.
6682 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
6683 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
6684 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
6685 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;