1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT mapping to useful data for Cell SPU
42 struct valtype_map_s {
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
83 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
87 // Fold away setcc operations if possible.
90 // Use _setjmp/_longjmp instead of setjmp/longjmp.
91 setUseUnderscoreSetJmp(true);
92 setUseUnderscoreLongJmp(true);
94 // Set up the SPU's register classes:
95 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
96 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
97 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
98 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
99 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
100 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
101 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
103 // SPU has no sign or zero extended loads for i1, i8, i16:
104 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
105 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
106 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
108 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
109 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
111 // SPU constant load actions are custom lowered:
112 setOperationAction(ISD::Constant, MVT::i64, Custom);
113 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
114 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
116 // SPU's loads and stores have to be custom lowered:
117 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
119 MVT VT = (MVT::SimpleValueType)sctype;
121 setOperationAction(ISD::LOAD, VT, Custom);
122 setOperationAction(ISD::STORE, VT, Custom);
123 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
125 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
127 // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
128 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
129 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
131 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
132 MVT StoreVT = (MVT::SimpleValueType) stype;
133 setTruncStoreAction(VT, StoreVT, Expand);
137 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
139 MVT VT = (MVT::SimpleValueType) sctype;
141 setOperationAction(ISD::LOAD, VT, Custom);
142 setOperationAction(ISD::STORE, VT, Custom);
144 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
145 MVT StoreVT = (MVT::SimpleValueType) stype;
146 setTruncStoreAction(VT, StoreVT, Expand);
150 // Custom lower BRCOND for i8 to "promote" the result to whatever the result
151 // operand happens to be:
152 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
154 // Expand the jumptable branches
155 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
156 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
158 // Custom lower SELECT_CC for most cases, but expand by default
159 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
160 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
161 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
162 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
163 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
165 // SPU has no intrinsics for these particular operations:
166 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
168 // SPU has no SREM/UREM instructions
169 setOperationAction(ISD::SREM, MVT::i32, Expand);
170 setOperationAction(ISD::UREM, MVT::i32, Expand);
171 setOperationAction(ISD::SREM, MVT::i64, Expand);
172 setOperationAction(ISD::UREM, MVT::i64, Expand);
174 // We don't support sin/cos/sqrt/fmod
175 setOperationAction(ISD::FSIN , MVT::f64, Expand);
176 setOperationAction(ISD::FCOS , MVT::f64, Expand);
177 setOperationAction(ISD::FREM , MVT::f64, Expand);
178 setOperationAction(ISD::FSIN , MVT::f32, Expand);
179 setOperationAction(ISD::FCOS , MVT::f32, Expand);
180 setOperationAction(ISD::FREM , MVT::f32, Expand);
182 // If we're enabling GP optimizations, use hardware square root
183 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
184 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
186 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
187 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
189 // SPU can do rotate right and left, so legalize it... but customize for i8
190 // because instructions don't exist.
192 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
194 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
195 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
196 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
198 setOperationAction(ISD::ROTL, MVT::i32, Legal);
199 setOperationAction(ISD::ROTL, MVT::i16, Legal);
200 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
207 // Make these operations legal and handle them during instruction selection:
208 setOperationAction(ISD::SHL, MVT::i64, Legal);
209 setOperationAction(ISD::SRL, MVT::i64, Legal);
210 setOperationAction(ISD::SRA, MVT::i64, Legal);
212 // Custom lower i8, i32 and i64 multiplications
213 setOperationAction(ISD::MUL, MVT::i8, Custom);
214 setOperationAction(ISD::MUL, MVT::i32, Legal);
215 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
217 // Need to custom handle (some) common i8, i64 math ops
218 setOperationAction(ISD::ADD, MVT::i8, Custom);
219 setOperationAction(ISD::ADD, MVT::i64, Custom);
220 setOperationAction(ISD::SUB, MVT::i8, Custom);
221 setOperationAction(ISD::SUB, MVT::i64, Custom);
223 // SPU does not have BSWAP. It does have i32 support CTLZ.
224 // CTPOP has to be custom lowered.
225 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
226 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
228 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
229 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
230 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
231 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
233 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
234 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
236 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
238 // SPU has a version of select that implements (a&~c)|(b&c), just like
239 // select ought to work:
240 setOperationAction(ISD::SELECT, MVT::i8, Legal);
241 setOperationAction(ISD::SELECT, MVT::i16, Legal);
242 setOperationAction(ISD::SELECT, MVT::i32, Legal);
243 setOperationAction(ISD::SELECT, MVT::i64, Legal);
245 setOperationAction(ISD::SETCC, MVT::i8, Legal);
246 setOperationAction(ISD::SETCC, MVT::i16, Legal);
247 setOperationAction(ISD::SETCC, MVT::i32, Legal);
248 setOperationAction(ISD::SETCC, MVT::i64, Legal);
250 // Zero extension and sign extension for i64 have to be
252 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
253 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
255 // Custom lower i128 -> i64 truncates
256 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
258 // SPU has a legal FP -> signed INT instruction
259 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
260 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
261 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
262 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
264 // FDIV on SPU requires custom lowering
265 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
267 // SPU has [U|S]INT_TO_FP
268 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
269 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
270 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
273 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
274 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
275 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
277 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
278 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
279 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
280 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
282 // We cannot sextinreg(i1). Expand to shifts.
283 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
285 // Support label based line numbers.
286 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
287 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
289 // We want to legalize GlobalAddress and ConstantPool nodes into the
290 // appropriate instructions to materialize the address.
291 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
293 MVT VT = (MVT::SimpleValueType)sctype;
295 setOperationAction(ISD::GlobalAddress, VT, Custom);
296 setOperationAction(ISD::ConstantPool, VT, Custom);
297 setOperationAction(ISD::JumpTable, VT, Custom);
300 // RET must be custom lowered, to meet ABI requirements
301 setOperationAction(ISD::RET, MVT::Other, Custom);
303 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
304 setOperationAction(ISD::VASTART , MVT::Other, Custom);
306 // Use the default implementation.
307 setOperationAction(ISD::VAARG , MVT::Other, Expand);
308 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
309 setOperationAction(ISD::VAEND , MVT::Other, Expand);
310 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
311 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
312 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
313 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
315 // Cell SPU has instructions for converting between i64 and fp.
316 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
317 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
319 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
320 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
322 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
323 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
325 // First set operation action for all vector types to expand. Then we
326 // will selectively turn on ones that can be effectively codegen'd.
327 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
328 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
329 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
330 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
331 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
332 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
334 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
335 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
336 MVT VT = (MVT::SimpleValueType)i;
338 // add/sub are legal for all supported vector VT's.
339 setOperationAction(ISD::ADD , VT, Legal);
340 setOperationAction(ISD::SUB , VT, Legal);
341 // mul has to be custom lowered.
342 // TODO: v2i64 vector multiply
343 setOperationAction(ISD::MUL , VT, Legal);
345 setOperationAction(ISD::AND , VT, Legal);
346 setOperationAction(ISD::OR , VT, Legal);
347 setOperationAction(ISD::XOR , VT, Legal);
348 setOperationAction(ISD::LOAD , VT, Legal);
349 setOperationAction(ISD::SELECT, VT, Legal);
350 setOperationAction(ISD::STORE, VT, Legal);
352 // These operations need to be expanded:
353 setOperationAction(ISD::SDIV, VT, Expand);
354 setOperationAction(ISD::SREM, VT, Expand);
355 setOperationAction(ISD::UDIV, VT, Expand);
356 setOperationAction(ISD::UREM, VT, Expand);
358 // Custom lower build_vector, constant pool spills, insert and
359 // extract vector elements:
360 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
361 setOperationAction(ISD::ConstantPool, VT, Custom);
362 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
363 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
364 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
365 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
368 setOperationAction(ISD::AND, MVT::v16i8, Custom);
369 setOperationAction(ISD::OR, MVT::v16i8, Custom);
370 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
371 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
373 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
375 setShiftAmountType(MVT::i32);
376 setBooleanContents(ZeroOrNegativeOneBooleanContent);
378 setStackPointerRegisterToSaveRestore(SPU::R1);
380 // We have target-specific dag combine patterns for the following nodes:
381 setTargetDAGCombine(ISD::ADD);
382 setTargetDAGCombine(ISD::ZERO_EXTEND);
383 setTargetDAGCombine(ISD::SIGN_EXTEND);
384 setTargetDAGCombine(ISD::ANY_EXTEND);
386 computeRegisterProperties();
388 // Set pre-RA register scheduler default to BURR, which produces slightly
389 // better code than the default (could also be TDRR, but TargetLowering.h
390 // needs a mod to support that model):
391 setSchedulingPreference(SchedulingForRegPressure);
395 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
397 if (node_names.empty()) {
398 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
399 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
400 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
401 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
402 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
403 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
404 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
405 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
406 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
407 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
408 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
409 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
410 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
411 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
412 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
413 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
414 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
415 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
416 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
417 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
418 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
419 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
420 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
421 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
422 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
423 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
424 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
427 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
429 return ((i != node_names.end()) ? i->second : 0);
432 //===----------------------------------------------------------------------===//
433 // Return the Cell SPU's SETCC result type
434 //===----------------------------------------------------------------------===//
436 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
437 MVT VT = Op.getValueType();
438 // i16 and i32 are valid SETCC result types
439 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
442 //===----------------------------------------------------------------------===//
443 // Calling convention code:
444 //===----------------------------------------------------------------------===//
446 #include "SPUGenCallingConv.inc"
448 //===----------------------------------------------------------------------===//
449 // LowerOperation implementation
450 //===----------------------------------------------------------------------===//
452 /// Custom lower loads for CellSPU
454 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
455 within a 16-byte block, we have to rotate to extract the requested element.
457 For extending loads, we also want to ensure that the following sequence is
458 emitted, e.g. for MVT::f32 extending load to MVT::f64:
462 %2 v16i8,ch = rotate %1
463 %3 v4f8, ch = bitconvert %2
464 %4 f32 = vec2perfslot %3
465 %5 f64 = fp_extend %4
469 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
470 LoadSDNode *LN = cast<LoadSDNode>(Op);
471 SDValue the_chain = LN->getChain();
472 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
473 MVT InVT = LN->getMemoryVT();
474 MVT OutVT = Op.getValueType();
475 ISD::LoadExtType ExtType = LN->getExtensionType();
476 unsigned alignment = LN->getAlignment();
477 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
479 switch (LN->getAddressingMode()) {
480 case ISD::UNINDEXED: {
482 SDValue basePtr = LN->getBasePtr();
485 if (alignment == 16) {
488 // Special cases for a known aligned load to simplify the base pointer
489 // and the rotation amount:
490 if (basePtr.getOpcode() == ISD::ADD
491 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
492 // Known offset into basePtr
493 int64_t offset = CN->getSExtValue();
494 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
499 rotate = DAG.getConstant(rotamt, MVT::i16);
501 // Simplify the base pointer for this case:
502 basePtr = basePtr.getOperand(0);
503 if ((offset & ~0xf) > 0) {
504 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
506 DAG.getConstant((offset & ~0xf), PtrVT));
508 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
509 || (basePtr.getOpcode() == SPUISD::IndirectAddr
510 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
511 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
512 // Plain aligned a-form address: rotate into preferred slot
513 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
514 int64_t rotamt = -vtm->prefslot_byte;
517 rotate = DAG.getConstant(rotamt, MVT::i16);
519 // Offset the rotate amount by the basePtr and the preferred slot
521 int64_t rotamt = -vtm->prefslot_byte;
524 rotate = DAG.getNode(ISD::ADD, PtrVT,
526 DAG.getConstant(rotamt, PtrVT));
529 // Unaligned load: must be more pessimistic about addressing modes:
530 if (basePtr.getOpcode() == ISD::ADD) {
531 MachineFunction &MF = DAG.getMachineFunction();
532 MachineRegisterInfo &RegInfo = MF.getRegInfo();
533 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
536 SDValue Op0 = basePtr.getOperand(0);
537 SDValue Op1 = basePtr.getOperand(1);
539 if (isa<ConstantSDNode>(Op1)) {
540 // Convert the (add <ptr>, <const>) to an indirect address contained
541 // in a register. Note that this is done because we need to avoid
542 // creating a 0(reg) d-form address due to the SPU's block loads.
543 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
544 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
545 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
547 // Convert the (add <arg1>, <arg2>) to an indirect address, which
548 // will likely be lowered as a reg(reg) x-form address.
549 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
552 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
554 DAG.getConstant(0, PtrVT));
557 // Offset the rotate amount by the basePtr and the preferred slot
559 rotate = DAG.getNode(ISD::ADD, PtrVT,
561 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
564 // Re-emit as a v16i8 vector load
565 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
566 LN->getSrcValue(), LN->getSrcValueOffset(),
567 LN->isVolatile(), 16);
570 the_chain = result.getValue(1);
572 // Rotate into the preferred slot:
573 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
574 result.getValue(0), rotate);
576 // Convert the loaded v16i8 vector to the appropriate vector type
577 // specified by the operand:
578 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
579 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
580 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
582 // Handle extending loads by extending the scalar result:
583 if (ExtType == ISD::SEXTLOAD) {
584 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
585 } else if (ExtType == ISD::ZEXTLOAD) {
586 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
587 } else if (ExtType == ISD::EXTLOAD) {
588 unsigned NewOpc = ISD::ANY_EXTEND;
590 if (OutVT.isFloatingPoint())
591 NewOpc = ISD::FP_EXTEND;
593 result = DAG.getNode(NewOpc, OutVT, result);
596 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
597 SDValue retops[2] = {
602 result = DAG.getNode(SPUISD::LDRESULT, retvts,
603 retops, sizeof(retops) / sizeof(retops[0]));
610 case ISD::LAST_INDEXED_MODE:
611 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
613 cerr << (unsigned) LN->getAddressingMode() << "\n";
621 /// Custom lower stores for CellSPU
623 All CellSPU stores are aligned to 16-byte boundaries, so for elements
624 within a 16-byte block, we have to generate a shuffle to insert the
625 requested element into its place, then store the resulting block.
628 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
629 StoreSDNode *SN = cast<StoreSDNode>(Op);
630 SDValue Value = SN->getValue();
631 MVT VT = Value.getValueType();
632 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
633 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
634 unsigned alignment = SN->getAlignment();
636 switch (SN->getAddressingMode()) {
637 case ISD::UNINDEXED: {
638 // The vector type we really want to load from the 16-byte chunk.
639 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
640 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
642 SDValue alignLoadVec;
643 SDValue basePtr = SN->getBasePtr();
644 SDValue the_chain = SN->getChain();
645 SDValue insertEltOffs;
647 if (alignment == 16) {
650 // Special cases for a known aligned load to simplify the base pointer
651 // and insertion byte:
652 if (basePtr.getOpcode() == ISD::ADD
653 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
654 // Known offset into basePtr
655 int64_t offset = CN->getSExtValue();
657 // Simplify the base pointer for this case:
658 basePtr = basePtr.getOperand(0);
659 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
661 DAG.getConstant((offset & 0xf), PtrVT));
663 if ((offset & ~0xf) > 0) {
664 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
666 DAG.getConstant((offset & ~0xf), PtrVT));
669 // Otherwise, assume it's at byte 0 of basePtr
670 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
672 DAG.getConstant(0, PtrVT));
675 // Unaligned load: must be more pessimistic about addressing modes:
676 if (basePtr.getOpcode() == ISD::ADD) {
677 MachineFunction &MF = DAG.getMachineFunction();
678 MachineRegisterInfo &RegInfo = MF.getRegInfo();
679 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
682 SDValue Op0 = basePtr.getOperand(0);
683 SDValue Op1 = basePtr.getOperand(1);
685 if (isa<ConstantSDNode>(Op1)) {
686 // Convert the (add <ptr>, <const>) to an indirect address contained
687 // in a register. Note that this is done because we need to avoid
688 // creating a 0(reg) d-form address due to the SPU's block loads.
689 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
690 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
691 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
693 // Convert the (add <arg1>, <arg2>) to an indirect address, which
694 // will likely be lowered as a reg(reg) x-form address.
695 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
698 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
700 DAG.getConstant(0, PtrVT));
703 // Insertion point is solely determined by basePtr's contents
704 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
706 DAG.getConstant(0, PtrVT));
709 // Re-emit as a v16i8 vector load
710 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
711 SN->getSrcValue(), SN->getSrcValueOffset(),
712 SN->isVolatile(), 16);
715 the_chain = alignLoadVec.getValue(1);
717 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
718 SDValue theValue = SN->getValue();
722 && (theValue.getOpcode() == ISD::AssertZext
723 || theValue.getOpcode() == ISD::AssertSext)) {
724 // Drill down and get the value for zero- and sign-extended
726 theValue = theValue.getOperand(0);
729 // If the base pointer is already a D-form address, then just create
730 // a new D-form address with a slot offset and the orignal base pointer.
731 // Otherwise generate a D-form address with the slot offset relative
732 // to the stack pointer, which is always aligned.
734 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
735 cerr << "CellSPU LowerSTORE: basePtr = ";
736 basePtr.getNode()->dump(&DAG);
741 SDValue insertEltOp =
742 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
743 SDValue vectorizeOp =
744 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
746 result = DAG.getNode(SPUISD::SHUFB, vecVT,
747 vectorizeOp, alignLoadVec,
748 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
750 result = DAG.getStore(the_chain, result, basePtr,
751 LN->getSrcValue(), LN->getSrcValueOffset(),
752 LN->isVolatile(), LN->getAlignment());
754 #if 0 && !defined(NDEBUG)
755 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
756 const SDValue ¤tRoot = DAG.getRoot();
759 cerr << "------- CellSPU:LowerStore result:\n";
762 DAG.setRoot(currentRoot);
773 case ISD::LAST_INDEXED_MODE:
774 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
776 cerr << (unsigned) SN->getAddressingMode() << "\n";
784 /// Generate the address of a constant pool entry.
786 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
787 MVT PtrVT = Op.getValueType();
788 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
789 Constant *C = CP->getConstVal();
790 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
791 SDValue Zero = DAG.getConstant(0, PtrVT);
792 const TargetMachine &TM = DAG.getTarget();
794 if (TM.getRelocationModel() == Reloc::Static) {
795 if (!ST->usingLargeMem()) {
796 // Just return the SDValue with the constant pool address in it.
797 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
799 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
800 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
801 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
806 "LowerConstantPool: Relocation model other than static"
812 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
813 MVT PtrVT = Op.getValueType();
814 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
815 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
816 SDValue Zero = DAG.getConstant(0, PtrVT);
817 const TargetMachine &TM = DAG.getTarget();
819 if (TM.getRelocationModel() == Reloc::Static) {
820 if (!ST->usingLargeMem()) {
821 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
823 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
824 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
825 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
830 "LowerJumpTable: Relocation model other than static not supported.");
835 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
836 MVT PtrVT = Op.getValueType();
837 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
838 GlobalValue *GV = GSDN->getGlobal();
839 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
840 const TargetMachine &TM = DAG.getTarget();
841 SDValue Zero = DAG.getConstant(0, PtrVT);
843 if (TM.getRelocationModel() == Reloc::Static) {
844 if (!ST->usingLargeMem()) {
845 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
847 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
848 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
849 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
852 cerr << "LowerGlobalAddress: Relocation model other than static not "
861 //! Custom lower i64 integer constants
863 This code inserts all of the necessary juggling that needs to occur to load
864 a 64-bit constant into a register.
867 LowerConstant(SDValue Op, SelectionDAG &DAG) {
868 MVT VT = Op.getValueType();
870 if (VT == MVT::i64) {
871 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
872 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
873 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
874 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
876 cerr << "LowerConstant: unhandled constant type "
886 //! Custom lower double precision floating point constants
888 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
889 MVT VT = Op.getValueType();
891 if (VT == MVT::f64) {
892 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
895 "LowerConstantFP: Node is not ConstantFPSDNode");
897 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
898 SDValue T = DAG.getConstant(dbits, MVT::i64);
899 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
900 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
901 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
908 LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) {
909 SDValue Cond = Op.getOperand(1);
910 MVT CondVT = Cond.getValueType();
913 if (CondVT == MVT::i8) {
914 SDValue CondOp0 = Cond.getOperand(0);
915 if (Cond.getOpcode() == ISD::TRUNCATE) {
916 // Use the truncate's value type and ANY_EXTEND the condition (DAGcombine
917 // will then remove the truncate)
918 CondVT = CondOp0.getValueType();
919 CondOpc = ISD::ANY_EXTEND;
921 CondVT = MVT::i32; // default to something reasonable
922 CondOpc = ISD::ZERO_EXTEND;
925 Cond = DAG.getNode(CondOpc, CondVT, Op.getOperand(1));
927 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
928 Op.getOperand(0), Cond, Op.getOperand(2));
931 return SDValue(); // Unchanged
935 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
937 MachineFunction &MF = DAG.getMachineFunction();
938 MachineFrameInfo *MFI = MF.getFrameInfo();
939 MachineRegisterInfo &RegInfo = MF.getRegInfo();
940 SmallVector<SDValue, 48> ArgValues;
941 SDValue Root = Op.getOperand(0);
942 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
944 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
945 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
947 unsigned ArgOffset = SPUFrameInfo::minStackSize();
948 unsigned ArgRegIdx = 0;
949 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
951 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
953 // Add DAG nodes to load the arguments or copy them out of registers.
954 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
955 ArgNo != e; ++ArgNo) {
956 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
957 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
960 if (ArgRegIdx < NumArgRegs) {
961 const TargetRegisterClass *ArgRegClass;
963 switch (ObjectVT.getSimpleVT()) {
965 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
966 << ObjectVT.getMVTString()
971 ArgRegClass = &SPU::R8CRegClass;
974 ArgRegClass = &SPU::R16CRegClass;
977 ArgRegClass = &SPU::R32CRegClass;
980 ArgRegClass = &SPU::R64CRegClass;
983 ArgRegClass = &SPU::R32FPRegClass;
986 ArgRegClass = &SPU::R64FPRegClass;
994 ArgRegClass = &SPU::VECREGRegClass;
998 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
999 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1000 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1003 // We need to load the argument to a virtual register if we determined
1004 // above that we ran out of physical registers of the appropriate type
1005 // or we're forced to do vararg
1006 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1007 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1008 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1009 ArgOffset += StackSlotSize;
1012 ArgValues.push_back(ArgVal);
1014 Root = ArgVal.getOperand(0);
1019 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1020 // We will spill (79-3)+1 registers to the stack
1021 SmallVector<SDValue, 79-3+1> MemOps;
1023 // Create the frame slot
1025 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1026 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1027 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1028 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1029 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1030 Root = Store.getOperand(0);
1031 MemOps.push_back(Store);
1033 // Increment address by stack slot size for the next stored argument
1034 ArgOffset += StackSlotSize;
1036 if (!MemOps.empty())
1037 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1040 ArgValues.push_back(Root);
1042 // Return the new list of results.
1043 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1044 &ArgValues[0], ArgValues.size());
1047 /// isLSAAddress - Return the immediate to use if the specified
1048 /// value is representable as a LSA address.
1049 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1050 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1053 int Addr = C->getZExtValue();
1054 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1055 (Addr << 14 >> 14) != Addr)
1056 return 0; // Top 14 bits have to be sext of immediate.
1058 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1063 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1064 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1065 SDValue Chain = TheCall->getChain();
1066 SDValue Callee = TheCall->getCallee();
1067 unsigned NumOps = TheCall->getNumArgs();
1068 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1069 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1070 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1072 // Handy pointer type
1073 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1075 // Accumulate how many bytes are to be pushed on the stack, including the
1076 // linkage area, and parameter passing area. According to the SPU ABI,
1077 // we minimally need space for [LR] and [SP]
1078 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1080 // Set up a copy of the stack pointer for use loading and storing any
1081 // arguments that may not fit in the registers available for argument
1083 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1085 // Figure out which arguments are going to go in registers, and which in
1087 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1088 unsigned ArgRegIdx = 0;
1090 // Keep track of registers passing arguments
1091 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1092 // And the arguments passed on the stack
1093 SmallVector<SDValue, 8> MemOpChains;
1095 for (unsigned i = 0; i != NumOps; ++i) {
1096 SDValue Arg = TheCall->getArg(i);
1098 // PtrOff will be used to store the current argument to the stack if a
1099 // register cannot be found for it.
1100 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1101 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1103 switch (Arg.getValueType().getSimpleVT()) {
1104 default: assert(0 && "Unexpected ValueType for argument!");
1108 if (ArgRegIdx != NumArgRegs) {
1109 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1111 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1112 ArgOffset += StackSlotSize;
1117 if (ArgRegIdx != NumArgRegs) {
1118 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1120 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1121 ArgOffset += StackSlotSize;
1130 if (ArgRegIdx != NumArgRegs) {
1131 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1133 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1134 ArgOffset += StackSlotSize;
1140 // Update number of stack bytes actually used, insert a call sequence start
1141 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1142 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1145 if (!MemOpChains.empty()) {
1146 // Adjust the stack pointer for the stack arguments.
1147 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1148 &MemOpChains[0], MemOpChains.size());
1151 // Build a sequence of copy-to-reg nodes chained together with token chain
1152 // and flag operands which copy the outgoing args into the appropriate regs.
1154 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1155 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1157 InFlag = Chain.getValue(1);
1160 SmallVector<SDValue, 8> Ops;
1161 unsigned CallOpc = SPUISD::CALL;
1163 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1164 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1165 // node so that legalize doesn't hack it.
1166 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1167 GlobalValue *GV = G->getGlobal();
1168 MVT CalleeVT = Callee.getValueType();
1169 SDValue Zero = DAG.getConstant(0, PtrVT);
1170 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1172 if (!ST->usingLargeMem()) {
1173 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1174 // style calls, otherwise, external symbols are BRASL calls. This assumes
1175 // that declared/defined symbols are in the same compilation unit and can
1176 // be reached through PC-relative jumps.
1179 // This may be an unsafe assumption for JIT and really large compilation
1181 if (GV->isDeclaration()) {
1182 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1184 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1187 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1189 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1191 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1192 MVT CalleeVT = Callee.getValueType();
1193 SDValue Zero = DAG.getConstant(0, PtrVT);
1194 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1195 Callee.getValueType());
1197 if (!ST->usingLargeMem()) {
1198 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1200 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1202 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1203 // If this is an absolute destination address that appears to be a legal
1204 // local store address, use the munged value.
1205 Callee = SDValue(Dest, 0);
1208 Ops.push_back(Chain);
1209 Ops.push_back(Callee);
1211 // Add argument registers to the end of the list so that they are known live
1213 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1214 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1215 RegsToPass[i].second.getValueType()));
1217 if (InFlag.getNode())
1218 Ops.push_back(InFlag);
1219 // Returns a chain and a flag for retval copy to use.
1220 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1221 &Ops[0], Ops.size());
1222 InFlag = Chain.getValue(1);
1224 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1225 DAG.getIntPtrConstant(0, true), InFlag);
1226 if (TheCall->getValueType(0) != MVT::Other)
1227 InFlag = Chain.getValue(1);
1229 SDValue ResultVals[3];
1230 unsigned NumResults = 0;
1232 // If the call has results, copy the values out of the ret val registers.
1233 switch (TheCall->getValueType(0).getSimpleVT()) {
1234 default: assert(0 && "Unexpected ret value!");
1235 case MVT::Other: break;
1237 if (TheCall->getValueType(1) == MVT::i32) {
1238 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1239 ResultVals[0] = Chain.getValue(0);
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1241 Chain.getValue(2)).getValue(1);
1242 ResultVals[1] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1246 ResultVals[0] = Chain.getValue(0);
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1257 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1268 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1269 InFlag).getValue(1);
1270 ResultVals[0] = Chain.getValue(0);
1275 // If the function returns void, just return the chain.
1276 if (NumResults == 0)
1279 // Otherwise, merge everything together with a MERGE_VALUES node.
1280 ResultVals[NumResults++] = Chain;
1281 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1282 return Res.getValue(Op.getResNo());
1286 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1287 SmallVector<CCValAssign, 16> RVLocs;
1288 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1289 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1290 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1291 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1293 // If this is the first return lowered for this function, add the regs to the
1294 // liveout set for the function.
1295 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1296 for (unsigned i = 0; i != RVLocs.size(); ++i)
1297 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1300 SDValue Chain = Op.getOperand(0);
1303 // Copy the result values into the output registers.
1304 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1305 CCValAssign &VA = RVLocs[i];
1306 assert(VA.isRegLoc() && "Can only return in registers!");
1307 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1308 Flag = Chain.getValue(1);
1312 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1314 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1318 //===----------------------------------------------------------------------===//
1319 // Vector related lowering:
1320 //===----------------------------------------------------------------------===//
1322 static ConstantSDNode *
1323 getVecImm(SDNode *N) {
1324 SDValue OpVal(0, 0);
1326 // Check to see if this buildvec has a single non-undef value in its elements.
1327 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1328 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1329 if (OpVal.getNode() == 0)
1330 OpVal = N->getOperand(i);
1331 else if (OpVal != N->getOperand(i))
1335 if (OpVal.getNode() != 0) {
1336 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1341 return 0; // All UNDEF: use implicit def.; not Constant node
1344 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1345 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1347 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1349 if (ConstantSDNode *CN = getVecImm(N)) {
1350 uint64_t Value = CN->getZExtValue();
1351 if (ValueType == MVT::i64) {
1352 uint64_t UValue = CN->getZExtValue();
1353 uint32_t upper = uint32_t(UValue >> 32);
1354 uint32_t lower = uint32_t(UValue);
1357 Value = Value >> 32;
1359 if (Value <= 0x3ffff)
1360 return DAG.getTargetConstant(Value, ValueType);
1366 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1367 /// and the value fits into a signed 16-bit constant, and if so, return the
1369 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1371 if (ConstantSDNode *CN = getVecImm(N)) {
1372 int64_t Value = CN->getSExtValue();
1373 if (ValueType == MVT::i64) {
1374 uint64_t UValue = CN->getZExtValue();
1375 uint32_t upper = uint32_t(UValue >> 32);
1376 uint32_t lower = uint32_t(UValue);
1379 Value = Value >> 32;
1381 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1382 return DAG.getTargetConstant(Value, ValueType);
1389 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1390 /// and the value fits into a signed 10-bit constant, and if so, return the
1392 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1394 if (ConstantSDNode *CN = getVecImm(N)) {
1395 int64_t Value = CN->getSExtValue();
1396 if (ValueType == MVT::i64) {
1397 uint64_t UValue = CN->getZExtValue();
1398 uint32_t upper = uint32_t(UValue >> 32);
1399 uint32_t lower = uint32_t(UValue);
1402 Value = Value >> 32;
1404 if (isS10Constant(Value))
1405 return DAG.getTargetConstant(Value, ValueType);
1411 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1412 /// and the value fits into a signed 8-bit constant, and if so, return the
1415 /// @note: The incoming vector is v16i8 because that's the only way we can load
1416 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1418 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1420 if (ConstantSDNode *CN = getVecImm(N)) {
1421 int Value = (int) CN->getZExtValue();
1422 if (ValueType == MVT::i16
1423 && Value <= 0xffff /* truncated from uint64_t */
1424 && ((short) Value >> 8) == ((short) Value & 0xff))
1425 return DAG.getTargetConstant(Value & 0xff, ValueType);
1426 else if (ValueType == MVT::i8
1427 && (Value & 0xff) == Value)
1428 return DAG.getTargetConstant(Value, ValueType);
1434 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1435 /// and the value fits into a signed 16-bit constant, and if so, return the
1437 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1439 if (ConstantSDNode *CN = getVecImm(N)) {
1440 uint64_t Value = CN->getZExtValue();
1441 if ((ValueType == MVT::i32
1442 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1443 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1444 return DAG.getTargetConstant(Value >> 16, ValueType);
1450 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1451 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1452 if (ConstantSDNode *CN = getVecImm(N)) {
1453 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1459 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1460 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1461 if (ConstantSDNode *CN = getVecImm(N)) {
1462 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1468 // If this is a vector of constants or undefs, get the bits. A bit in
1469 // UndefBits is set if the corresponding element of the vector is an
1470 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1471 // zero. Return true if this is not an array of constants, false if it is.
1473 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1474 uint64_t UndefBits[2]) {
1475 // Start with zero'd results.
1476 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1478 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1479 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1480 SDValue OpVal = BV->getOperand(i);
1482 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1483 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1485 uint64_t EltBits = 0;
1486 if (OpVal.getOpcode() == ISD::UNDEF) {
1487 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1488 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1490 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1491 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1492 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1493 const APFloat &apf = CN->getValueAPF();
1494 EltBits = (CN->getValueType(0) == MVT::f32
1495 ? FloatToBits(apf.convertToFloat())
1496 : DoubleToBits(apf.convertToDouble()));
1498 // Nonconstant element.
1502 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1505 //printf("%llx %llx %llx %llx\n",
1506 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1510 /// If this is a splat (repetition) of a value across the whole vector, return
1511 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1512 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1513 /// SplatSize = 1 byte.
1514 static bool isConstantSplat(const uint64_t Bits128[2],
1515 const uint64_t Undef128[2],
1517 uint64_t &SplatBits, uint64_t &SplatUndef,
1519 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1520 // the same as the lower 64-bits, ignoring undefs.
1521 uint64_t Bits64 = Bits128[0] | Bits128[1];
1522 uint64_t Undef64 = Undef128[0] & Undef128[1];
1523 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1524 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1525 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1526 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1528 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1529 if (MinSplatBits < 64) {
1531 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1533 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1534 if (MinSplatBits < 32) {
1536 // If the top 16-bits are different than the lower 16-bits, ignoring
1537 // undefs, we have an i32 splat.
1538 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1539 if (MinSplatBits < 16) {
1540 // If the top 8-bits are different than the lower 8-bits, ignoring
1541 // undefs, we have an i16 splat.
1542 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1543 == ((Bits16 >> 8) & ~Undef16)) {
1544 // Otherwise, we have an 8-bit splat.
1545 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1546 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1552 SplatUndef = Undef16;
1559 SplatUndef = Undef32;
1565 SplatBits = Bits128[0];
1566 SplatUndef = Undef128[0];
1572 return false; // Can't be a splat if two pieces don't match.
1575 // If this is a case we can't handle, return null and let the default
1576 // expansion code take care of it. If we CAN select this case, and if it
1577 // selects to a single instruction, return Op. Otherwise, if we can codegen
1578 // this case more efficiently than a constant pool load, lower it to the
1579 // sequence of ops that should be used.
1580 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1581 MVT VT = Op.getValueType();
1582 // If this is a vector of constants or undefs, get the bits. A bit in
1583 // UndefBits is set if the corresponding element of the vector is an
1584 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1586 uint64_t VectorBits[2];
1587 uint64_t UndefBits[2];
1588 uint64_t SplatBits, SplatUndef;
1590 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1591 || !isConstantSplat(VectorBits, UndefBits,
1592 VT.getVectorElementType().getSizeInBits(),
1593 SplatBits, SplatUndef, SplatSize))
1594 return SDValue(); // Not a constant vector, not a splat.
1596 switch (VT.getSimpleVT()) {
1599 uint32_t Value32 = SplatBits;
1600 assert(SplatSize == 4
1601 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1602 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1603 SDValue T = DAG.getConstant(Value32, MVT::i32);
1604 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1605 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1609 uint64_t f64val = SplatBits;
1610 assert(SplatSize == 8
1611 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1612 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1613 SDValue T = DAG.getConstant(f64val, MVT::i64);
1614 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1615 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1619 // 8-bit constants have to be expanded to 16-bits
1620 unsigned short Value16 = SplatBits | (SplatBits << 8);
1622 for (int i = 0; i < 8; ++i)
1623 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1624 return DAG.getNode(ISD::BIT_CONVERT, VT,
1625 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1628 unsigned short Value16;
1630 Value16 = (unsigned short) (SplatBits & 0xffff);
1632 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1633 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1635 for (int i = 0; i < 8; ++i) Ops[i] = T;
1636 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1639 unsigned int Value = SplatBits;
1640 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1641 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1644 uint64_t val = SplatBits;
1645 uint32_t upper = uint32_t(val >> 32);
1646 uint32_t lower = uint32_t(val);
1648 if (upper == lower) {
1649 // Magic constant that can be matched by IL, ILA, et. al.
1650 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1651 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1655 SmallVector<SDValue, 16> ShufBytes;
1657 bool upper_special, lower_special;
1659 // NOTE: This code creates common-case shuffle masks that can be easily
1660 // detected as common expressions. It is not attempting to create highly
1661 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1663 // Detect if the upper or lower half is a special shuffle mask pattern:
1664 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1665 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1667 // Create lower vector if not a special pattern
1668 if (!lower_special) {
1669 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1670 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1671 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1672 LO32C, LO32C, LO32C, LO32C));
1675 // Create upper vector if not a special pattern
1676 if (!upper_special) {
1677 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1678 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1679 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1680 HI32C, HI32C, HI32C, HI32C));
1683 // If either upper or lower are special, then the two input operands are
1684 // the same (basically, one of them is a "don't care")
1689 if (lower_special && upper_special) {
1690 // Unhappy situation... both upper and lower are special, so punt with
1691 // a target constant:
1692 SDValue Zero = DAG.getConstant(0, MVT::i32);
1693 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1697 for (int i = 0; i < 4; ++i) {
1699 for (int j = 0; j < 4; ++j) {
1701 bool process_upper, process_lower;
1703 process_upper = (upper_special && (i & 1) == 0);
1704 process_lower = (lower_special && (i & 1) == 1);
1706 if (process_upper || process_lower) {
1707 if ((process_upper && upper == 0)
1708 || (process_lower && lower == 0))
1710 else if ((process_upper && upper == 0xffffffff)
1711 || (process_lower && lower == 0xffffffff))
1713 else if ((process_upper && upper == 0x80000000)
1714 || (process_lower && lower == 0x80000000))
1715 val |= (j == 0 ? 0xe0 : 0x80);
1717 val |= i * 4 + j + ((i & 1) * 16);
1720 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1723 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1724 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1725 &ShufBytes[0], ShufBytes.size()));
1733 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1734 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1735 /// permutation vector, V3, is monotonically increasing with one "exception"
1736 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1737 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1738 /// In either case, the net result is going to eventually invoke SHUFB to
1739 /// permute/shuffle the bytes from V1 and V2.
1741 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1742 /// control word for byte/halfword/word insertion. This takes care of a single
1743 /// element move from V2 into V1.
1745 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1746 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1747 SDValue V1 = Op.getOperand(0);
1748 SDValue V2 = Op.getOperand(1);
1749 SDValue PermMask = Op.getOperand(2);
1751 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1753 // If we have a single element being moved from V1 to V2, this can be handled
1754 // using the C*[DX] compute mask instructions, but the vector elements have
1755 // to be monotonically increasing with one exception element.
1756 MVT VecVT = V1.getValueType();
1757 MVT EltVT = VecVT.getVectorElementType();
1758 unsigned EltsFromV2 = 0;
1760 unsigned V2EltIdx0 = 0;
1761 unsigned CurrElt = 0;
1762 unsigned MaxElts = VecVT.getVectorNumElements();
1763 unsigned PrevElt = 0;
1765 bool monotonic = true;
1768 if (EltVT == MVT::i8) {
1770 } else if (EltVT == MVT::i16) {
1772 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1774 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1777 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1779 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1780 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1781 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1784 if (SrcElt >= V2EltIdx0) {
1785 if (1 >= (++EltsFromV2)) {
1786 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1788 } else if (CurrElt != SrcElt) {
1796 if (PrevElt > 0 && SrcElt < MaxElts) {
1797 if ((PrevElt == SrcElt - 1)
1798 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1805 } else if (PrevElt == 0) {
1806 // First time through, need to keep track of previous element
1809 // This isn't a rotation, takes elements from vector 2
1816 if (EltsFromV2 == 1 && monotonic) {
1817 // Compute mask and shuffle
1818 MachineFunction &MF = DAG.getMachineFunction();
1819 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1820 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1821 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1822 // Initialize temporary register to 0
1823 SDValue InitTempReg =
1824 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1825 // Copy register's contents as index in SHUFFLE_MASK:
1826 SDValue ShufMaskOp =
1827 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1828 DAG.getTargetConstant(V2Elt, MVT::i32),
1829 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1830 // Use shuffle mask in SHUFB synthetic instruction:
1831 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1832 } else if (rotate) {
1833 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1835 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1836 V1, DAG.getConstant(rotamt, MVT::i16));
1838 // Convert the SHUFFLE_VECTOR mask's input element units to the
1840 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1842 SmallVector<SDValue, 16> ResultMask;
1843 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1845 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1848 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1850 for (unsigned j = 0; j < BytesPerElement; ++j) {
1851 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1856 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1857 &ResultMask[0], ResultMask.size());
1858 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1862 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1863 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1865 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1866 // For a constant, build the appropriate constant vector, which will
1867 // eventually simplify to a vector register load.
1869 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1870 SmallVector<SDValue, 16> ConstVecValues;
1874 // Create a constant vector:
1875 switch (Op.getValueType().getSimpleVT()) {
1876 default: assert(0 && "Unexpected constant value type in "
1877 "LowerSCALAR_TO_VECTOR");
1878 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1879 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1880 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1881 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1882 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1883 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1886 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1887 for (size_t j = 0; j < n_copies; ++j)
1888 ConstVecValues.push_back(CValue);
1890 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1891 &ConstVecValues[0], ConstVecValues.size());
1893 // Otherwise, copy the value from one register to another:
1894 switch (Op0.getValueType().getSimpleVT()) {
1895 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1902 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1909 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1910 MVT VT = Op.getValueType();
1911 SDValue N = Op.getOperand(0);
1912 SDValue Elt = Op.getOperand(1);
1915 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1916 // Constant argument:
1917 int EltNo = (int) C->getZExtValue();
1920 if (VT == MVT::i8 && EltNo >= 16)
1921 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1922 else if (VT == MVT::i16 && EltNo >= 8)
1923 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1924 else if (VT == MVT::i32 && EltNo >= 4)
1925 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1926 else if (VT == MVT::i64 && EltNo >= 2)
1927 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1929 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1930 // i32 and i64: Element 0 is the preferred slot
1931 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1934 // Need to generate shuffle mask and extract:
1935 int prefslot_begin = -1, prefslot_end = -1;
1936 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1938 switch (VT.getSimpleVT()) {
1940 assert(false && "Invalid value type!");
1942 prefslot_begin = prefslot_end = 3;
1946 prefslot_begin = 2; prefslot_end = 3;
1951 prefslot_begin = 0; prefslot_end = 3;
1956 prefslot_begin = 0; prefslot_end = 7;
1961 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1962 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1964 unsigned int ShufBytes[16];
1965 for (int i = 0; i < 16; ++i) {
1966 // zero fill uppper part of preferred slot, don't care about the
1968 unsigned int mask_val;
1969 if (i <= prefslot_end) {
1971 ((i < prefslot_begin)
1973 : elt_byte + (i - prefslot_begin));
1975 ShufBytes[i] = mask_val;
1977 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1980 SDValue ShufMask[4];
1981 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1982 unsigned bidx = i * 4;
1983 unsigned int bits = ((ShufBytes[bidx] << 24) |
1984 (ShufBytes[bidx+1] << 16) |
1985 (ShufBytes[bidx+2] << 8) |
1987 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1990 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1992 sizeof(ShufMask) / sizeof(ShufMask[0]));
1994 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
1995 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
1996 N, N, ShufMaskVec));
1998 // Variable index: Rotate the requested element into slot 0, then replicate
1999 // slot 0 across the vector
2000 MVT VecVT = N.getValueType();
2001 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2002 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2006 // Make life easier by making sure the index is zero-extended to i32
2007 if (Elt.getValueType() != MVT::i32)
2008 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2010 // Scale the index to a bit/byte shift quantity
2012 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2013 unsigned scaleShift = scaleFactor.logBase2();
2016 if (scaleShift > 0) {
2017 // Scale the shift factor:
2018 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2019 DAG.getConstant(scaleShift, MVT::i32));
2022 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2024 // Replicate the bytes starting at byte 0 across the entire vector (for
2025 // consistency with the notion of a unified register set)
2028 switch (VT.getSimpleVT()) {
2030 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2034 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2035 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2040 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2041 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2047 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2048 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2054 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2055 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2056 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2057 loFactor, hiFactor);
2062 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2063 DAG.getNode(SPUISD::SHUFB, VecVT,
2064 vecShift, vecShift, replicate));
2070 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2071 SDValue VecOp = Op.getOperand(0);
2072 SDValue ValOp = Op.getOperand(1);
2073 SDValue IdxOp = Op.getOperand(2);
2074 MVT VT = Op.getValueType();
2076 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2077 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2079 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2080 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2081 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2082 DAG.getRegister(SPU::R1, PtrVT),
2083 DAG.getConstant(CN->getSExtValue(), PtrVT));
2084 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2087 DAG.getNode(SPUISD::SHUFB, VT,
2088 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2090 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2095 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2096 const TargetLowering &TLI)
2098 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2099 MVT ShiftVT = TLI.getShiftAmountTy();
2101 assert(Op.getValueType() == MVT::i8);
2104 assert(0 && "Unhandled i8 math operator");
2108 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2110 SDValue N1 = Op.getOperand(1);
2111 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2112 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2113 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2114 DAG.getNode(Opc, MVT::i16, N0, N1));
2119 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2121 SDValue N1 = Op.getOperand(1);
2122 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2123 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2124 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2125 DAG.getNode(Opc, MVT::i16, N0, N1));
2129 SDValue N1 = Op.getOperand(1);
2131 N0 = (N0.getOpcode() != ISD::Constant
2132 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2133 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2135 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2138 N1 = (N1.getOpcode() != ISD::Constant
2139 ? DAG.getNode(N1Opc, ShiftVT, N1)
2140 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2141 TLI.getShiftAmountTy()));
2143 DAG.getNode(ISD::OR, MVT::i16, N0,
2144 DAG.getNode(ISD::SHL, MVT::i16,
2145 N0, DAG.getConstant(8, MVT::i32)));
2146 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2147 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2151 SDValue N1 = Op.getOperand(1);
2153 N0 = (N0.getOpcode() != ISD::Constant
2154 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2155 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2157 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2160 N1 = (N1.getOpcode() != ISD::Constant
2161 ? DAG.getNode(N1Opc, ShiftVT, N1)
2162 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2163 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2164 DAG.getNode(Opc, MVT::i16, N0, N1));
2167 SDValue N1 = Op.getOperand(1);
2169 N0 = (N0.getOpcode() != ISD::Constant
2170 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2171 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2173 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2176 N1 = (N1.getOpcode() != ISD::Constant
2177 ? DAG.getNode(N1Opc, ShiftVT, N1)
2178 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2180 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2181 DAG.getNode(Opc, MVT::i16, N0, N1));
2184 SDValue N1 = Op.getOperand(1);
2186 N0 = (N0.getOpcode() != ISD::Constant
2187 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2188 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2190 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2191 N1 = (N1.getOpcode() != ISD::Constant
2192 ? DAG.getNode(N1Opc, MVT::i16, N1)
2193 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2195 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2196 DAG.getNode(Opc, MVT::i16, N0, N1));
2204 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2206 MVT VT = Op.getValueType();
2207 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2209 SDValue Op0 = Op.getOperand(0);
2212 case ISD::ZERO_EXTEND:
2213 case ISD::ANY_EXTEND: {
2214 MVT Op0VT = Op0.getValueType();
2215 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2217 assert(Op0VT == MVT::i32
2218 && "CellSPU: Zero/sign extending something other than i32");
2220 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2222 SDValue PromoteScalar =
2223 DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2225 // Use a shuffle to zero extend the i32 to i64 directly:
2226 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2227 DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203,
2228 MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(
2229 0x08090a0b, MVT::i32));
2230 SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar,
2231 PromoteScalar, shufMask);
2233 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT,
2234 VecVT, zextShuffle));
2238 // Turn operands into vectors to satisfy type checking (shufb works on
2241 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2243 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2244 SmallVector<SDValue, 16> ShufBytes;
2246 // Create the shuffle mask for "rotating" the borrow up one register slot
2247 // once the borrow is generated.
2248 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2249 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2250 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2251 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2254 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2255 SDValue ShiftedCarry =
2256 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2258 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2259 &ShufBytes[0], ShufBytes.size()));
2261 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2262 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2263 Op0, Op1, ShiftedCarry));
2267 // Turn operands into vectors to satisfy type checking (shufb works on
2270 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2272 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2273 SmallVector<SDValue, 16> ShufBytes;
2275 // Create the shuffle mask for "rotating" the borrow up one register slot
2276 // once the borrow is generated.
2277 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2278 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2279 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2280 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2283 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2284 SDValue ShiftedBorrow =
2285 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2286 BorrowGen, BorrowGen,
2287 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2288 &ShufBytes[0], ShufBytes.size()));
2290 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2291 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2292 Op0, Op1, ShiftedBorrow));
2299 //! Lower byte immediate operations for v16i8 vectors:
2301 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2304 MVT VT = Op.getValueType();
2306 ConstVec = Op.getOperand(0);
2307 Arg = Op.getOperand(1);
2308 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2309 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2310 ConstVec = ConstVec.getOperand(0);
2312 ConstVec = Op.getOperand(1);
2313 Arg = Op.getOperand(0);
2314 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2315 ConstVec = ConstVec.getOperand(0);
2320 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2321 uint64_t VectorBits[2];
2322 uint64_t UndefBits[2];
2323 uint64_t SplatBits, SplatUndef;
2326 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2327 && isConstantSplat(VectorBits, UndefBits,
2328 VT.getVectorElementType().getSizeInBits(),
2329 SplatBits, SplatUndef, SplatSize)) {
2331 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2332 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2334 // Turn the BUILD_VECTOR into a set of target constants:
2335 for (size_t i = 0; i < tcVecSize; ++i)
2338 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2339 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2342 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2343 // lowered. Return the operation, rather than a null SDValue.
2347 //! Custom lowering for CTPOP (count population)
2349 Custom lowering code that counts the number ones in the input
2350 operand. SPU has such an instruction, but it counts the number of
2351 ones per byte, which then have to be accumulated.
2353 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2354 MVT VT = Op.getValueType();
2355 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2357 switch (VT.getSimpleVT()) {
2359 assert(false && "Invalid value type!");
2361 SDValue N = Op.getOperand(0);
2362 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2364 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2365 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2367 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2371 MachineFunction &MF = DAG.getMachineFunction();
2372 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2374 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2376 SDValue N = Op.getOperand(0);
2377 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2378 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2379 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2381 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2382 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2384 // CNTB_result becomes the chain to which all of the virtual registers
2385 // CNTB_reg, SUM1_reg become associated:
2386 SDValue CNTB_result =
2387 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2389 SDValue CNTB_rescopy =
2390 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2392 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2394 return DAG.getNode(ISD::AND, MVT::i16,
2395 DAG.getNode(ISD::ADD, MVT::i16,
2396 DAG.getNode(ISD::SRL, MVT::i16,
2403 MachineFunction &MF = DAG.getMachineFunction();
2404 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2406 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2407 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2409 SDValue N = Op.getOperand(0);
2410 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2411 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2412 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2413 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2415 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2416 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2418 // CNTB_result becomes the chain to which all of the virtual registers
2419 // CNTB_reg, SUM1_reg become associated:
2420 SDValue CNTB_result =
2421 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2423 SDValue CNTB_rescopy =
2424 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2427 DAG.getNode(ISD::SRL, MVT::i32,
2428 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2431 DAG.getNode(ISD::ADD, MVT::i32,
2432 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2434 SDValue Sum1_rescopy =
2435 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2438 DAG.getNode(ISD::SRL, MVT::i32,
2439 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2442 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2443 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2445 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2455 //! Lower ISD::SETCC
2457 Lower i64 condition code handling.
2460 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) {
2461 MVT VT = Op.getValueType();
2462 SDValue lhs = Op.getOperand(0);
2463 SDValue rhs = Op.getOperand(1);
2464 SDValue condition = Op.getOperand(2);
2466 if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) {
2467 // Expand the i64 comparisons to what Cell can actually support,
2468 // which is eq, ugt and sgt:
2470 CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition);
2472 switch (ccvalue->get()) {
2481 //! Lower ISD::SELECT_CC
2483 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2486 \note Need to revisit this in the future: if the code path through the true
2487 and false value computations is longer than the latency of a branch (6
2488 cycles), then it would be more advantageous to branch and insert a new basic
2489 block and branch on the condition. However, this code does not make that
2490 assumption, given the simplisitc uses so far.
2493 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2494 const TargetLowering &TLI) {
2495 MVT VT = Op.getValueType();
2496 SDValue lhs = Op.getOperand(0);
2497 SDValue rhs = Op.getOperand(1);
2498 SDValue trueval = Op.getOperand(2);
2499 SDValue falseval = Op.getOperand(3);
2500 SDValue condition = Op.getOperand(4);
2502 // NOTE: SELB's arguments: $rA, $rB, $mask
2504 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2505 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2506 // condition was true and 0s where the condition was false. Hence, the
2507 // arguments to SELB get reversed.
2509 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2510 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2511 // with another "cannot select select_cc" assert:
2513 SDValue compare = DAG.getNode(ISD::SETCC, TLI.getSetCCResultType(Op),
2514 lhs, rhs, condition);
2515 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2518 //! Custom lower ISD::TRUNCATE
2519 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2521 MVT VT = Op.getValueType();
2522 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2523 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2525 SDValue Op0 = Op.getOperand(0);
2526 MVT Op0VT = Op0.getValueType();
2527 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2529 // Create shuffle mask
2530 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2531 // least significant doubleword of quadword
2532 unsigned maskHigh = 0x08090a0b;
2533 unsigned maskLow = 0x0c0d0e0f;
2534 // Use a shuffle to perform the truncation
2535 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2536 DAG.getConstant(maskHigh, MVT::i32),
2537 DAG.getConstant(maskLow, MVT::i32),
2538 DAG.getConstant(maskHigh, MVT::i32),
2539 DAG.getConstant(maskLow, MVT::i32));
2542 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2544 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2545 PromoteScalar, PromoteScalar, shufMask);
2547 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2548 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2551 return SDValue(); // Leave the truncate unmolested
2554 //! Custom (target-specific) lowering entry point
2556 This is where LLVM's DAG selection process calls to do target-specific
2560 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2562 unsigned Opc = (unsigned) Op.getOpcode();
2563 MVT VT = Op.getValueType();
2567 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2568 cerr << "Op.getOpcode() = " << Opc << "\n";
2569 cerr << "*Op.getNode():\n";
2570 Op.getNode()->dump();
2577 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2579 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2580 case ISD::ConstantPool:
2581 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2582 case ISD::GlobalAddress:
2583 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2584 case ISD::JumpTable:
2585 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2587 return LowerConstant(Op, DAG);
2588 case ISD::ConstantFP:
2589 return LowerConstantFP(Op, DAG);
2591 return LowerBRCOND(Op, DAG, *this);
2592 case ISD::FORMAL_ARGUMENTS:
2593 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2595 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2597 return LowerRET(Op, DAG, getTargetMachine());
2600 case ISD::ZERO_EXTEND:
2601 case ISD::ANY_EXTEND:
2602 return LowerI64Math(Op, DAG, Opc);
2604 // i8, i64 math ops:
2613 return LowerI8Math(Op, DAG, Opc, *this);
2614 else if (VT == MVT::i64)
2615 return LowerI64Math(Op, DAG, Opc);
2619 // Vector-related lowering.
2620 case ISD::BUILD_VECTOR:
2621 return LowerBUILD_VECTOR(Op, DAG);
2622 case ISD::SCALAR_TO_VECTOR:
2623 return LowerSCALAR_TO_VECTOR(Op, DAG);
2624 case ISD::VECTOR_SHUFFLE:
2625 return LowerVECTOR_SHUFFLE(Op, DAG);
2626 case ISD::EXTRACT_VECTOR_ELT:
2627 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2628 case ISD::INSERT_VECTOR_ELT:
2629 return LowerINSERT_VECTOR_ELT(Op, DAG);
2631 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2635 return LowerByteImmed(Op, DAG);
2637 // Vector and i8 multiply:
2640 return LowerI8Math(Op, DAG, Opc, *this);
2643 return LowerCTPOP(Op, DAG);
2645 case ISD::SELECT_CC:
2646 return LowerSELECT_CC(Op, DAG, *this);
2649 return LowerTRUNCATE(Op, DAG);
2652 return LowerSETCC(Op, DAG);
2658 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2659 SmallVectorImpl<SDValue>&Results,
2663 unsigned Opc = (unsigned) N->getOpcode();
2664 MVT OpVT = N->getValueType(0);
2668 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2669 cerr << "Op.getOpcode() = " << Opc << "\n";
2670 cerr << "*Op.getNode():\n";
2678 /* Otherwise, return unchanged */
2681 //===----------------------------------------------------------------------===//
2682 // Target Optimization Hooks
2683 //===----------------------------------------------------------------------===//
2686 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2689 TargetMachine &TM = getTargetMachine();
2691 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2692 SelectionDAG &DAG = DCI.DAG;
2693 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2694 MVT NodeVT = N->getValueType(0); // The node's value type
2695 MVT Op0VT = Op0.getValueType(); // The first operand's result
2696 SDValue Result; // Initially, empty result
2698 switch (N->getOpcode()) {
2701 SDValue Op1 = N->getOperand(1);
2703 if (Op0.getOpcode() == SPUISD::IndirectAddr
2704 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2705 // Normalize the operands to reduce repeated code
2706 SDValue IndirectArg = Op0, AddArg = Op1;
2708 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2713 if (isa<ConstantSDNode>(AddArg)) {
2714 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2715 SDValue IndOp1 = IndirectArg.getOperand(1);
2717 if (CN0->isNullValue()) {
2718 // (add (SPUindirect <arg>, <arg>), 0) ->
2719 // (SPUindirect <arg>, <arg>)
2721 #if !defined(NDEBUG)
2722 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2724 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2725 << "With: (SPUindirect <arg>, <arg>)\n";
2730 } else if (isa<ConstantSDNode>(IndOp1)) {
2731 // (add (SPUindirect <arg>, <const>), <const>) ->
2732 // (SPUindirect <arg>, <const + const>)
2733 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2734 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2735 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2737 #if !defined(NDEBUG)
2738 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2740 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2741 << "), " << CN0->getSExtValue() << ")\n"
2742 << "With: (SPUindirect <arg>, "
2743 << combinedConst << ")\n";
2747 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2748 IndirectArg, combinedValue);
2754 case ISD::SIGN_EXTEND:
2755 case ISD::ZERO_EXTEND:
2756 case ISD::ANY_EXTEND: {
2757 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2758 // (any_extend (SPUextract_elt0 <arg>)) ->
2759 // (SPUextract_elt0 <arg>)
2760 // Types must match, however...
2761 #if !defined(NDEBUG)
2762 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2763 cerr << "\nReplace: ";
2766 Op0.getNode()->dump(&DAG);
2775 case SPUISD::IndirectAddr: {
2776 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2777 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2778 if (CN->getZExtValue() == 0) {
2779 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2780 // (SPUaform <addr>, 0)
2782 DEBUG(cerr << "Replace: ");
2783 DEBUG(N->dump(&DAG));
2784 DEBUG(cerr << "\nWith: ");
2785 DEBUG(Op0.getNode()->dump(&DAG));
2786 DEBUG(cerr << "\n");
2790 } else if (Op0.getOpcode() == ISD::ADD) {
2791 SDValue Op1 = N->getOperand(1);
2792 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2793 // (SPUindirect (add <arg>, <arg>), 0) ->
2794 // (SPUindirect <arg>, <arg>)
2795 if (CN1->isNullValue()) {
2797 #if !defined(NDEBUG)
2798 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2800 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2801 << "With: (SPUindirect <arg>, <arg>)\n";
2805 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2806 Op0.getOperand(0), Op0.getOperand(1));
2812 case SPUISD::SHLQUAD_L_BITS:
2813 case SPUISD::SHLQUAD_L_BYTES:
2814 case SPUISD::VEC_SHL:
2815 case SPUISD::VEC_SRL:
2816 case SPUISD::VEC_SRA:
2817 case SPUISD::ROTBYTES_LEFT: {
2818 SDValue Op1 = N->getOperand(1);
2820 // Kill degenerate vector shifts:
2821 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2822 if (CN->isNullValue()) {
2828 case SPUISD::PREFSLOT2VEC: {
2829 switch (Op0.getOpcode()) {
2832 case ISD::ANY_EXTEND:
2833 case ISD::ZERO_EXTEND:
2834 case ISD::SIGN_EXTEND: {
2835 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2837 // but only if the SPUprefslot2vec and <arg> types match.
2838 SDValue Op00 = Op0.getOperand(0);
2839 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2840 SDValue Op000 = Op00.getOperand(0);
2841 if (Op000.getValueType() == NodeVT) {
2847 case SPUISD::VEC2PREFSLOT: {
2848 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2850 Result = Op0.getOperand(0);
2857 // Otherwise, return unchanged.
2859 if (Result.getNode()) {
2860 DEBUG(cerr << "\nReplace.SPU: ");
2861 DEBUG(N->dump(&DAG));
2862 DEBUG(cerr << "\nWith: ");
2863 DEBUG(Result.getNode()->dump(&DAG));
2864 DEBUG(cerr << "\n");
2871 //===----------------------------------------------------------------------===//
2872 // Inline Assembly Support
2873 //===----------------------------------------------------------------------===//
2875 /// getConstraintType - Given a constraint letter, return the type of
2876 /// constraint it is for this target.
2877 SPUTargetLowering::ConstraintType
2878 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2879 if (ConstraintLetter.size() == 1) {
2880 switch (ConstraintLetter[0]) {
2887 return C_RegisterClass;
2890 return TargetLowering::getConstraintType(ConstraintLetter);
2893 std::pair<unsigned, const TargetRegisterClass*>
2894 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2897 if (Constraint.size() == 1) {
2898 // GCC RS6000 Constraint Letters
2899 switch (Constraint[0]) {
2903 return std::make_pair(0U, SPU::R64CRegisterClass);
2904 return std::make_pair(0U, SPU::R32CRegisterClass);
2907 return std::make_pair(0U, SPU::R32FPRegisterClass);
2908 else if (VT == MVT::f64)
2909 return std::make_pair(0U, SPU::R64FPRegisterClass);
2912 return std::make_pair(0U, SPU::GPRCRegisterClass);
2916 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2919 //! Compute used/known bits for a SPU operand
2921 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2925 const SelectionDAG &DAG,
2926 unsigned Depth ) const {
2928 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2931 switch (Op.getOpcode()) {
2933 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2943 case SPUISD::PREFSLOT2VEC: {
2944 SDValue Op0 = Op.getOperand(0);
2945 MVT Op0VT = Op0.getValueType();
2946 unsigned Op0VTBits = Op0VT.getSizeInBits();
2947 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2948 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2949 KnownOne |= APInt(Op0VTBits, InMask, false);
2953 case SPUISD::LDRESULT:
2954 case SPUISD::VEC2PREFSLOT: {
2955 MVT OpVT = Op.getValueType();
2956 unsigned OpVTBits = OpVT.getSizeInBits();
2957 uint64_t InMask = OpVT.getIntegerVTBitMask();
2958 KnownZero |= APInt(OpVTBits, ~InMask, false);
2959 KnownOne |= APInt(OpVTBits, InMask, false);
2964 case SPUISD::SHLQUAD_L_BITS:
2965 case SPUISD::SHLQUAD_L_BYTES:
2966 case SPUISD::VEC_SHL:
2967 case SPUISD::VEC_SRL:
2968 case SPUISD::VEC_SRA:
2969 case SPUISD::VEC_ROTL:
2970 case SPUISD::VEC_ROTR:
2971 case SPUISD::ROTBYTES_LEFT:
2972 case SPUISD::SELECT_MASK:
2974 case SPUISD::SEXT32TO64:
2980 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2981 unsigned Depth) const {
2982 switch (Op.getOpcode()) {
2987 MVT VT = Op.getValueType();
2989 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2992 return VT.getSizeInBits();
2997 // LowerAsmOperandForConstraint
2999 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3000 char ConstraintLetter,
3002 std::vector<SDValue> &Ops,
3003 SelectionDAG &DAG) const {
3004 // Default, for the time being, to the base class handler
3005 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3009 /// isLegalAddressImmediate - Return true if the integer value can be used
3010 /// as the offset of the target addressing mode.
3011 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3012 const Type *Ty) const {
3013 // SPU's addresses are 256K:
3014 return (V > -(1 << 18) && V < (1 << 18) - 1);
3017 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3022 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3023 // The SPU target isn't yet aware of offsets.