2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
73 cerr << "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG,
119 return CallInfo.first;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
124 : TargetLowering(TM),
127 // Fold away setcc operations if possible.
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
139 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
140 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
141 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
142 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
143 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
144 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
149 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
152 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
156 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
161 MVT VT = (MVT::SimpleValueType)sctype;
163 setOperationAction(ISD::LOAD, VT, Custom);
164 setOperationAction(ISD::STORE, VT, Custom);
165 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
167 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
169 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
170 MVT StoreVT = (MVT::SimpleValueType) stype;
171 setTruncStoreAction(VT, StoreVT, Expand);
175 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
177 MVT VT = (MVT::SimpleValueType) sctype;
179 setOperationAction(ISD::LOAD, VT, Custom);
180 setOperationAction(ISD::STORE, VT, Custom);
182 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
183 MVT StoreVT = (MVT::SimpleValueType) stype;
184 setTruncStoreAction(VT, StoreVT, Expand);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
190 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
194 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
197 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM, MVT::i32, Expand);
204 setOperationAction(ISD::UREM, MVT::i32, Expand);
205 setOperationAction(ISD::SREM, MVT::i64, Expand);
206 setOperationAction(ISD::UREM, MVT::i64, Expand);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN , MVT::f64, Expand);
210 setOperationAction(ISD::FCOS , MVT::f64, Expand);
211 setOperationAction(ISD::FREM , MVT::f64, Expand);
212 setOperationAction(ISD::FSIN , MVT::f32, Expand);
213 setOperationAction(ISD::FCOS , MVT::f32, Expand);
214 setOperationAction(ISD::FREM , MVT::f32, Expand);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
218 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
219 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
222 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
229 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
231 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
233 setOperationAction(ISD::ROTL, MVT::i32, Legal);
234 setOperationAction(ISD::ROTL, MVT::i16, Legal);
235 setOperationAction(ISD::ROTL, MVT::i8, Custom);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL, MVT::i8, Custom);
239 setOperationAction(ISD::SRL, MVT::i8, Custom);
240 setOperationAction(ISD::SRA, MVT::i8, Custom);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL, MVT::i64, Legal);
244 setOperationAction(ISD::SRL, MVT::i64, Legal);
245 setOperationAction(ISD::SRA, MVT::i64, Legal);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL, MVT::i8, Custom);
249 setOperationAction(ISD::MUL, MVT::i32, Legal);
250 setOperationAction(ISD::MUL, MVT::i64, Legal);
252 // Expand double-width multiplication
253 // FIXME: It would probably be reasonable to support some of these operations
254 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
255 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
256 setOperationAction(ISD::MULHU, MVT::i8, Expand);
257 setOperationAction(ISD::MULHS, MVT::i8, Expand);
258 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
259 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
260 setOperationAction(ISD::MULHU, MVT::i16, Expand);
261 setOperationAction(ISD::MULHS, MVT::i16, Expand);
262 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
263 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
264 setOperationAction(ISD::MULHU, MVT::i32, Expand);
265 setOperationAction(ISD::MULHS, MVT::i32, Expand);
266 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
267 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
268 setOperationAction(ISD::MULHU, MVT::i64, Expand);
269 setOperationAction(ISD::MULHS, MVT::i64, Expand);
271 // Need to custom handle (some) common i8, i64 math ops
272 setOperationAction(ISD::ADD, MVT::i8, Custom);
273 setOperationAction(ISD::ADD, MVT::i64, Legal);
274 setOperationAction(ISD::SUB, MVT::i8, Custom);
275 setOperationAction(ISD::SUB, MVT::i64, Legal);
277 // SPU does not have BSWAP. It does have i32 support CTLZ.
278 // CTPOP has to be custom lowered.
279 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
280 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
282 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
283 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
284 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
285 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
287 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
288 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
290 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
292 // SPU has a version of select that implements (a&~c)|(b&c), just like
293 // select ought to work:
294 setOperationAction(ISD::SELECT, MVT::i8, Legal);
295 setOperationAction(ISD::SELECT, MVT::i16, Legal);
296 setOperationAction(ISD::SELECT, MVT::i32, Legal);
297 setOperationAction(ISD::SELECT, MVT::i64, Legal);
299 setOperationAction(ISD::SETCC, MVT::i8, Legal);
300 setOperationAction(ISD::SETCC, MVT::i16, Legal);
301 setOperationAction(ISD::SETCC, MVT::i32, Legal);
302 setOperationAction(ISD::SETCC, MVT::i64, Legal);
303 setOperationAction(ISD::SETCC, MVT::f64, Custom);
305 // Custom lower i128 -> i64 truncates
306 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
308 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
309 // to expand to a libcall, hence the custom lowering:
310 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
311 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
313 // FDIV on SPU requires custom lowering
314 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
316 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
317 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
318 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
319 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
320 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
321 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
322 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
323 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
324 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
326 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
327 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
328 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
329 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
331 // We cannot sextinreg(i1). Expand to shifts.
332 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
334 // Support label based line numbers.
335 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
336 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
338 // We want to legalize GlobalAddress and ConstantPool nodes into the
339 // appropriate instructions to materialize the address.
340 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
342 MVT VT = (MVT::SimpleValueType)sctype;
344 setOperationAction(ISD::GlobalAddress, VT, Custom);
345 setOperationAction(ISD::ConstantPool, VT, Custom);
346 setOperationAction(ISD::JumpTable, VT, Custom);
349 // RET must be custom lowered, to meet ABI requirements
350 setOperationAction(ISD::RET, MVT::Other, Custom);
352 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
353 setOperationAction(ISD::VASTART , MVT::Other, Custom);
355 // Use the default implementation.
356 setOperationAction(ISD::VAARG , MVT::Other, Expand);
357 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
358 setOperationAction(ISD::VAEND , MVT::Other, Expand);
359 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
360 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
361 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
362 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
364 // Cell SPU has instructions for converting between i64 and fp.
365 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
366 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
368 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
369 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
371 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
372 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
374 // First set operation action for all vector types to expand. Then we
375 // will selectively turn on ones that can be effectively codegen'd.
376 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
377 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
378 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
379 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
380 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
381 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
383 // "Odd size" vector classes that we're willing to support:
384 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
386 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
387 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
388 MVT VT = (MVT::SimpleValueType)i;
390 // add/sub are legal for all supported vector VT's.
391 setOperationAction(ISD::ADD, VT, Legal);
392 setOperationAction(ISD::SUB, VT, Legal);
393 // mul has to be custom lowered.
394 setOperationAction(ISD::MUL, VT, Legal);
396 setOperationAction(ISD::AND, VT, Legal);
397 setOperationAction(ISD::OR, VT, Legal);
398 setOperationAction(ISD::XOR, VT, Legal);
399 setOperationAction(ISD::LOAD, VT, Legal);
400 setOperationAction(ISD::SELECT, VT, Legal);
401 setOperationAction(ISD::STORE, VT, Legal);
403 // These operations need to be expanded:
404 setOperationAction(ISD::SDIV, VT, Expand);
405 setOperationAction(ISD::SREM, VT, Expand);
406 setOperationAction(ISD::UDIV, VT, Expand);
407 setOperationAction(ISD::UREM, VT, Expand);
409 // Custom lower build_vector, constant pool spills, insert and
410 // extract vector elements:
411 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
412 setOperationAction(ISD::ConstantPool, VT, Custom);
413 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
414 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
415 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
416 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
419 setOperationAction(ISD::AND, MVT::v16i8, Custom);
420 setOperationAction(ISD::OR, MVT::v16i8, Custom);
421 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
422 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
424 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
426 setShiftAmountType(MVT::i32);
427 setBooleanContents(ZeroOrNegativeOneBooleanContent);
429 setStackPointerRegisterToSaveRestore(SPU::R1);
431 // We have target-specific dag combine patterns for the following nodes:
432 setTargetDAGCombine(ISD::ADD);
433 setTargetDAGCombine(ISD::ZERO_EXTEND);
434 setTargetDAGCombine(ISD::SIGN_EXTEND);
435 setTargetDAGCombine(ISD::ANY_EXTEND);
437 computeRegisterProperties();
439 // Set pre-RA register scheduler default to BURR, which produces slightly
440 // better code than the default (could also be TDRR, but TargetLowering.h
441 // needs a mod to support that model):
442 setSchedulingPreference(SchedulingForRegPressure);
446 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
448 if (node_names.empty()) {
449 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
450 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
451 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
452 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
453 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
454 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
455 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
456 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
457 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
458 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
459 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
460 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
461 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
462 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
463 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
464 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
465 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
466 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
467 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
468 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
469 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
470 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
471 "SPUISD::ROTBYTES_LEFT_BITS";
472 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
473 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
474 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
475 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
476 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
479 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
481 return ((i != node_names.end()) ? i->second : 0);
484 /// getFunctionAlignment - Return the Log2 alignment of this function.
485 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
489 //===----------------------------------------------------------------------===//
490 // Return the Cell SPU's SETCC result type
491 //===----------------------------------------------------------------------===//
493 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
494 // i16 and i32 are valid SETCC result types
495 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
498 //===----------------------------------------------------------------------===//
499 // Calling convention code:
500 //===----------------------------------------------------------------------===//
502 #include "SPUGenCallingConv.inc"
504 //===----------------------------------------------------------------------===//
505 // LowerOperation implementation
506 //===----------------------------------------------------------------------===//
508 /// Custom lower loads for CellSPU
510 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
511 within a 16-byte block, we have to rotate to extract the requested element.
513 For extending loads, we also want to ensure that the following sequence is
514 emitted, e.g. for MVT::f32 extending load to MVT::f64:
518 %2 v16i8,ch = rotate %1
519 %3 v4f8, ch = bitconvert %2
520 %4 f32 = vec2perfslot %3
521 %5 f64 = fp_extend %4
525 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
526 LoadSDNode *LN = cast<LoadSDNode>(Op);
527 SDValue the_chain = LN->getChain();
528 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
529 MVT InVT = LN->getMemoryVT();
530 MVT OutVT = Op.getValueType();
531 ISD::LoadExtType ExtType = LN->getExtensionType();
532 unsigned alignment = LN->getAlignment();
533 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
534 DebugLoc dl = Op.getDebugLoc();
536 switch (LN->getAddressingMode()) {
537 case ISD::UNINDEXED: {
539 SDValue basePtr = LN->getBasePtr();
542 if (alignment == 16) {
545 // Special cases for a known aligned load to simplify the base pointer
546 // and the rotation amount:
547 if (basePtr.getOpcode() == ISD::ADD
548 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
549 // Known offset into basePtr
550 int64_t offset = CN->getSExtValue();
551 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
556 rotate = DAG.getConstant(rotamt, MVT::i16);
558 // Simplify the base pointer for this case:
559 basePtr = basePtr.getOperand(0);
560 if ((offset & ~0xf) > 0) {
561 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
563 DAG.getConstant((offset & ~0xf), PtrVT));
565 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
566 || (basePtr.getOpcode() == SPUISD::IndirectAddr
567 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
568 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
569 // Plain aligned a-form address: rotate into preferred slot
570 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
571 int64_t rotamt = -vtm->prefslot_byte;
574 rotate = DAG.getConstant(rotamt, MVT::i16);
576 // Offset the rotate amount by the basePtr and the preferred slot
578 int64_t rotamt = -vtm->prefslot_byte;
581 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
583 DAG.getConstant(rotamt, PtrVT));
586 // Unaligned load: must be more pessimistic about addressing modes:
587 if (basePtr.getOpcode() == ISD::ADD) {
588 MachineFunction &MF = DAG.getMachineFunction();
589 MachineRegisterInfo &RegInfo = MF.getRegInfo();
590 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
593 SDValue Op0 = basePtr.getOperand(0);
594 SDValue Op1 = basePtr.getOperand(1);
596 if (isa<ConstantSDNode>(Op1)) {
597 // Convert the (add <ptr>, <const>) to an indirect address contained
598 // in a register. Note that this is done because we need to avoid
599 // creating a 0(reg) d-form address due to the SPU's block loads.
600 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
601 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
602 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
604 // Convert the (add <arg1>, <arg2>) to an indirect address, which
605 // will likely be lowered as a reg(reg) x-form address.
606 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
609 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
611 DAG.getConstant(0, PtrVT));
614 // Offset the rotate amount by the basePtr and the preferred slot
616 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
618 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
621 // Re-emit as a v16i8 vector load
622 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
623 LN->getSrcValue(), LN->getSrcValueOffset(),
624 LN->isVolatile(), 16);
627 the_chain = result.getValue(1);
629 // Rotate into the preferred slot:
630 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
631 result.getValue(0), rotate);
633 // Convert the loaded v16i8 vector to the appropriate vector type
634 // specified by the operand:
635 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
636 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
637 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
639 // Handle extending loads by extending the scalar result:
640 if (ExtType == ISD::SEXTLOAD) {
641 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
642 } else if (ExtType == ISD::ZEXTLOAD) {
643 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
644 } else if (ExtType == ISD::EXTLOAD) {
645 unsigned NewOpc = ISD::ANY_EXTEND;
647 if (OutVT.isFloatingPoint())
648 NewOpc = ISD::FP_EXTEND;
650 result = DAG.getNode(NewOpc, dl, OutVT, result);
653 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
654 SDValue retops[2] = {
659 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
660 retops, sizeof(retops) / sizeof(retops[0]));
667 case ISD::LAST_INDEXED_MODE:
668 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
670 cerr << (unsigned) LN->getAddressingMode() << "\n";
678 /// Custom lower stores for CellSPU
680 All CellSPU stores are aligned to 16-byte boundaries, so for elements
681 within a 16-byte block, we have to generate a shuffle to insert the
682 requested element into its place, then store the resulting block.
685 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
686 StoreSDNode *SN = cast<StoreSDNode>(Op);
687 SDValue Value = SN->getValue();
688 MVT VT = Value.getValueType();
689 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
690 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
691 DebugLoc dl = Op.getDebugLoc();
692 unsigned alignment = SN->getAlignment();
694 switch (SN->getAddressingMode()) {
695 case ISD::UNINDEXED: {
696 // The vector type we really want to load from the 16-byte chunk.
697 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
698 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
700 SDValue alignLoadVec;
701 SDValue basePtr = SN->getBasePtr();
702 SDValue the_chain = SN->getChain();
703 SDValue insertEltOffs;
705 if (alignment == 16) {
708 // Special cases for a known aligned load to simplify the base pointer
709 // and insertion byte:
710 if (basePtr.getOpcode() == ISD::ADD
711 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
712 // Known offset into basePtr
713 int64_t offset = CN->getSExtValue();
715 // Simplify the base pointer for this case:
716 basePtr = basePtr.getOperand(0);
717 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
719 DAG.getConstant((offset & 0xf), PtrVT));
721 if ((offset & ~0xf) > 0) {
722 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
724 DAG.getConstant((offset & ~0xf), PtrVT));
727 // Otherwise, assume it's at byte 0 of basePtr
728 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
730 DAG.getConstant(0, PtrVT));
733 // Unaligned load: must be more pessimistic about addressing modes:
734 if (basePtr.getOpcode() == ISD::ADD) {
735 MachineFunction &MF = DAG.getMachineFunction();
736 MachineRegisterInfo &RegInfo = MF.getRegInfo();
737 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
740 SDValue Op0 = basePtr.getOperand(0);
741 SDValue Op1 = basePtr.getOperand(1);
743 if (isa<ConstantSDNode>(Op1)) {
744 // Convert the (add <ptr>, <const>) to an indirect address contained
745 // in a register. Note that this is done because we need to avoid
746 // creating a 0(reg) d-form address due to the SPU's block loads.
747 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
748 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
749 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
751 // Convert the (add <arg1>, <arg2>) to an indirect address, which
752 // will likely be lowered as a reg(reg) x-form address.
753 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
756 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
758 DAG.getConstant(0, PtrVT));
761 // Insertion point is solely determined by basePtr's contents
762 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
764 DAG.getConstant(0, PtrVT));
767 // Re-emit as a v16i8 vector load
768 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
769 SN->getSrcValue(), SN->getSrcValueOffset(),
770 SN->isVolatile(), 16);
773 the_chain = alignLoadVec.getValue(1);
775 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
776 SDValue theValue = SN->getValue();
780 && (theValue.getOpcode() == ISD::AssertZext
781 || theValue.getOpcode() == ISD::AssertSext)) {
782 // Drill down and get the value for zero- and sign-extended
784 theValue = theValue.getOperand(0);
787 // If the base pointer is already a D-form address, then just create
788 // a new D-form address with a slot offset and the orignal base pointer.
789 // Otherwise generate a D-form address with the slot offset relative
790 // to the stack pointer, which is always aligned.
792 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
793 cerr << "CellSPU LowerSTORE: basePtr = ";
794 basePtr.getNode()->dump(&DAG);
799 SDValue insertEltOp =
800 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
801 SDValue vectorizeOp =
802 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
804 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
805 vectorizeOp, alignLoadVec,
806 DAG.getNode(ISD::BIT_CONVERT, dl,
807 MVT::v4i32, insertEltOp));
809 result = DAG.getStore(the_chain, dl, result, basePtr,
810 LN->getSrcValue(), LN->getSrcValueOffset(),
811 LN->isVolatile(), LN->getAlignment());
813 #if 0 && !defined(NDEBUG)
814 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
815 const SDValue ¤tRoot = DAG.getRoot();
818 cerr << "------- CellSPU:LowerStore result:\n";
821 DAG.setRoot(currentRoot);
832 case ISD::LAST_INDEXED_MODE:
833 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
835 cerr << (unsigned) SN->getAddressingMode() << "\n";
843 //! Generate the address of a constant pool entry.
845 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
846 MVT PtrVT = Op.getValueType();
847 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
848 Constant *C = CP->getConstVal();
849 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
850 SDValue Zero = DAG.getConstant(0, PtrVT);
851 const TargetMachine &TM = DAG.getTarget();
852 // FIXME there is no actual debug info here
853 DebugLoc dl = Op.getDebugLoc();
855 if (TM.getRelocationModel() == Reloc::Static) {
856 if (!ST->usingLargeMem()) {
857 // Just return the SDValue with the constant pool address in it.
858 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
860 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
861 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
862 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
867 "LowerConstantPool: Relocation model other than static"
872 //! Alternate entry point for generating the address of a constant pool entry
874 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
875 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
879 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
880 MVT PtrVT = Op.getValueType();
881 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
882 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
883 SDValue Zero = DAG.getConstant(0, PtrVT);
884 const TargetMachine &TM = DAG.getTarget();
885 // FIXME there is no actual debug info here
886 DebugLoc dl = Op.getDebugLoc();
888 if (TM.getRelocationModel() == Reloc::Static) {
889 if (!ST->usingLargeMem()) {
890 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
892 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
893 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
894 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
899 "LowerJumpTable: Relocation model other than static not supported.");
904 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
905 MVT PtrVT = Op.getValueType();
906 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
907 GlobalValue *GV = GSDN->getGlobal();
908 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
909 const TargetMachine &TM = DAG.getTarget();
910 SDValue Zero = DAG.getConstant(0, PtrVT);
911 // FIXME there is no actual debug info here
912 DebugLoc dl = Op.getDebugLoc();
914 if (TM.getRelocationModel() == Reloc::Static) {
915 if (!ST->usingLargeMem()) {
916 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
918 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
919 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
920 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
923 cerr << "LowerGlobalAddress: Relocation model other than static not "
932 //! Custom lower double precision floating point constants
934 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
935 MVT VT = Op.getValueType();
936 // FIXME there is no actual debug info here
937 DebugLoc dl = Op.getDebugLoc();
939 if (VT == MVT::f64) {
940 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
943 "LowerConstantFP: Node is not ConstantFPSDNode");
945 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
946 SDValue T = DAG.getConstant(dbits, MVT::i64);
947 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
948 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
949 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
956 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
958 MachineFunction &MF = DAG.getMachineFunction();
959 MachineFrameInfo *MFI = MF.getFrameInfo();
960 MachineRegisterInfo &RegInfo = MF.getRegInfo();
961 SmallVector<SDValue, 48> ArgValues;
962 SDValue Root = Op.getOperand(0);
963 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
964 DebugLoc dl = Op.getDebugLoc();
966 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
967 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
969 unsigned ArgOffset = SPUFrameInfo::minStackSize();
970 unsigned ArgRegIdx = 0;
971 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
973 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
975 // Add DAG nodes to load the arguments or copy them out of registers.
976 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
977 ArgNo != e; ++ArgNo) {
978 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
979 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
982 if (ArgRegIdx < NumArgRegs) {
983 const TargetRegisterClass *ArgRegClass;
985 switch (ObjectVT.getSimpleVT()) {
987 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
988 << ObjectVT.getMVTString()
993 ArgRegClass = &SPU::R8CRegClass;
996 ArgRegClass = &SPU::R16CRegClass;
999 ArgRegClass = &SPU::R32CRegClass;
1002 ArgRegClass = &SPU::R64CRegClass;
1005 ArgRegClass = &SPU::GPRCRegClass;
1008 ArgRegClass = &SPU::R32FPRegClass;
1011 ArgRegClass = &SPU::R64FPRegClass;
1019 ArgRegClass = &SPU::VECREGRegClass;
1023 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1024 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1025 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1028 // We need to load the argument to a virtual register if we determined
1029 // above that we ran out of physical registers of the appropriate type
1030 // or we're forced to do vararg
1031 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1032 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1033 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1034 ArgOffset += StackSlotSize;
1037 ArgValues.push_back(ArgVal);
1039 Root = ArgVal.getOperand(0);
1044 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1045 // We will spill (79-3)+1 registers to the stack
1046 SmallVector<SDValue, 79-3+1> MemOps;
1048 // Create the frame slot
1050 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1051 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1052 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1053 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1054 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1055 Root = Store.getOperand(0);
1056 MemOps.push_back(Store);
1058 // Increment address by stack slot size for the next stored argument
1059 ArgOffset += StackSlotSize;
1061 if (!MemOps.empty())
1062 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1063 &MemOps[0], MemOps.size());
1066 ArgValues.push_back(Root);
1068 // Return the new list of results.
1069 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1070 &ArgValues[0], ArgValues.size());
1073 /// isLSAAddress - Return the immediate to use if the specified
1074 /// value is representable as a LSA address.
1075 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1076 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1079 int Addr = C->getZExtValue();
1080 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1081 (Addr << 14 >> 14) != Addr)
1082 return 0; // Top 14 bits have to be sext of immediate.
1084 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1088 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1089 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1090 SDValue Chain = TheCall->getChain();
1091 SDValue Callee = TheCall->getCallee();
1092 unsigned NumOps = TheCall->getNumArgs();
1093 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1094 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1095 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1096 DebugLoc dl = TheCall->getDebugLoc();
1098 // Handy pointer type
1099 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1101 // Accumulate how many bytes are to be pushed on the stack, including the
1102 // linkage area, and parameter passing area. According to the SPU ABI,
1103 // we minimally need space for [LR] and [SP]
1104 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1106 // Set up a copy of the stack pointer for use loading and storing any
1107 // arguments that may not fit in the registers available for argument
1109 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1111 // Figure out which arguments are going to go in registers, and which in
1113 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1114 unsigned ArgRegIdx = 0;
1116 // Keep track of registers passing arguments
1117 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1118 // And the arguments passed on the stack
1119 SmallVector<SDValue, 8> MemOpChains;
1121 for (unsigned i = 0; i != NumOps; ++i) {
1122 SDValue Arg = TheCall->getArg(i);
1124 // PtrOff will be used to store the current argument to the stack if a
1125 // register cannot be found for it.
1126 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1127 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1129 switch (Arg.getValueType().getSimpleVT()) {
1130 default: assert(0 && "Unexpected ValueType for argument!");
1136 if (ArgRegIdx != NumArgRegs) {
1137 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1139 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1140 ArgOffset += StackSlotSize;
1145 if (ArgRegIdx != NumArgRegs) {
1146 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1148 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1149 ArgOffset += StackSlotSize;
1158 if (ArgRegIdx != NumArgRegs) {
1159 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1161 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1162 ArgOffset += StackSlotSize;
1168 // Update number of stack bytes actually used, insert a call sequence start
1169 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1170 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1173 if (!MemOpChains.empty()) {
1174 // Adjust the stack pointer for the stack arguments.
1175 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1176 &MemOpChains[0], MemOpChains.size());
1179 // Build a sequence of copy-to-reg nodes chained together with token chain
1180 // and flag operands which copy the outgoing args into the appropriate regs.
1182 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1183 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1184 RegsToPass[i].second, InFlag);
1185 InFlag = Chain.getValue(1);
1188 SmallVector<SDValue, 8> Ops;
1189 unsigned CallOpc = SPUISD::CALL;
1191 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1192 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1193 // node so that legalize doesn't hack it.
1194 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1195 GlobalValue *GV = G->getGlobal();
1196 MVT CalleeVT = Callee.getValueType();
1197 SDValue Zero = DAG.getConstant(0, PtrVT);
1198 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1200 if (!ST->usingLargeMem()) {
1201 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1202 // style calls, otherwise, external symbols are BRASL calls. This assumes
1203 // that declared/defined symbols are in the same compilation unit and can
1204 // be reached through PC-relative jumps.
1207 // This may be an unsafe assumption for JIT and really large compilation
1209 if (GV->isDeclaration()) {
1210 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1212 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1215 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1217 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1219 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1220 MVT CalleeVT = Callee.getValueType();
1221 SDValue Zero = DAG.getConstant(0, PtrVT);
1222 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1223 Callee.getValueType());
1225 if (!ST->usingLargeMem()) {
1226 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1228 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1230 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1231 // If this is an absolute destination address that appears to be a legal
1232 // local store address, use the munged value.
1233 Callee = SDValue(Dest, 0);
1236 Ops.push_back(Chain);
1237 Ops.push_back(Callee);
1239 // Add argument registers to the end of the list so that they are known live
1241 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1242 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1243 RegsToPass[i].second.getValueType()));
1245 if (InFlag.getNode())
1246 Ops.push_back(InFlag);
1247 // Returns a chain and a flag for retval copy to use.
1248 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1249 &Ops[0], Ops.size());
1250 InFlag = Chain.getValue(1);
1252 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1253 DAG.getIntPtrConstant(0, true), InFlag);
1254 if (TheCall->getValueType(0) != MVT::Other)
1255 InFlag = Chain.getValue(1);
1257 SDValue ResultVals[3];
1258 unsigned NumResults = 0;
1260 // If the call has results, copy the values out of the ret val registers.
1261 switch (TheCall->getValueType(0).getSimpleVT()) {
1262 default: assert(0 && "Unexpected ret value!");
1263 case MVT::Other: break;
1265 if (TheCall->getValueType(1) == MVT::i32) {
1266 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1267 MVT::i32, InFlag).getValue(1);
1268 ResultVals[0] = Chain.getValue(0);
1269 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1270 Chain.getValue(2)).getValue(1);
1271 ResultVals[1] = Chain.getValue(0);
1274 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1275 InFlag).getValue(1);
1276 ResultVals[0] = Chain.getValue(0);
1281 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1282 InFlag).getValue(1);
1283 ResultVals[0] = Chain.getValue(0);
1287 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1288 InFlag).getValue(1);
1289 ResultVals[0] = Chain.getValue(0);
1294 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1295 InFlag).getValue(1);
1296 ResultVals[0] = Chain.getValue(0);
1305 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1306 InFlag).getValue(1);
1307 ResultVals[0] = Chain.getValue(0);
1312 // If the function returns void, just return the chain.
1313 if (NumResults == 0)
1316 // Otherwise, merge everything together with a MERGE_VALUES node.
1317 ResultVals[NumResults++] = Chain;
1318 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1319 return Res.getValue(Op.getResNo());
1323 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1324 SmallVector<CCValAssign, 16> RVLocs;
1325 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1326 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1327 DebugLoc dl = Op.getDebugLoc();
1328 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1329 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1331 // If this is the first return lowered for this function, add the regs to the
1332 // liveout set for the function.
1333 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1334 for (unsigned i = 0; i != RVLocs.size(); ++i)
1335 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1338 SDValue Chain = Op.getOperand(0);
1341 // Copy the result values into the output registers.
1342 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1343 CCValAssign &VA = RVLocs[i];
1344 assert(VA.isRegLoc() && "Can only return in registers!");
1345 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1346 Op.getOperand(i*2+1), Flag);
1347 Flag = Chain.getValue(1);
1351 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1353 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1357 //===----------------------------------------------------------------------===//
1358 // Vector related lowering:
1359 //===----------------------------------------------------------------------===//
1361 static ConstantSDNode *
1362 getVecImm(SDNode *N) {
1363 SDValue OpVal(0, 0);
1365 // Check to see if this buildvec has a single non-undef value in its elements.
1366 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1367 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1368 if (OpVal.getNode() == 0)
1369 OpVal = N->getOperand(i);
1370 else if (OpVal != N->getOperand(i))
1374 if (OpVal.getNode() != 0) {
1375 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1383 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1384 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1386 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 uint64_t Value = CN->getZExtValue();
1390 if (ValueType == MVT::i64) {
1391 uint64_t UValue = CN->getZExtValue();
1392 uint32_t upper = uint32_t(UValue >> 32);
1393 uint32_t lower = uint32_t(UValue);
1396 Value = Value >> 32;
1398 if (Value <= 0x3ffff)
1399 return DAG.getTargetConstant(Value, ValueType);
1405 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1406 /// and the value fits into a signed 16-bit constant, and if so, return the
1408 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1410 if (ConstantSDNode *CN = getVecImm(N)) {
1411 int64_t Value = CN->getSExtValue();
1412 if (ValueType == MVT::i64) {
1413 uint64_t UValue = CN->getZExtValue();
1414 uint32_t upper = uint32_t(UValue >> 32);
1415 uint32_t lower = uint32_t(UValue);
1418 Value = Value >> 32;
1420 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1421 return DAG.getTargetConstant(Value, ValueType);
1428 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into a signed 10-bit constant, and if so, return the
1431 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 int64_t Value = CN->getSExtValue();
1435 if (ValueType == MVT::i64) {
1436 uint64_t UValue = CN->getZExtValue();
1437 uint32_t upper = uint32_t(UValue >> 32);
1438 uint32_t lower = uint32_t(UValue);
1441 Value = Value >> 32;
1443 if (isS10Constant(Value))
1444 return DAG.getTargetConstant(Value, ValueType);
1450 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1451 /// and the value fits into a signed 8-bit constant, and if so, return the
1454 /// @note: The incoming vector is v16i8 because that's the only way we can load
1455 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1457 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1459 if (ConstantSDNode *CN = getVecImm(N)) {
1460 int Value = (int) CN->getZExtValue();
1461 if (ValueType == MVT::i16
1462 && Value <= 0xffff /* truncated from uint64_t */
1463 && ((short) Value >> 8) == ((short) Value & 0xff))
1464 return DAG.getTargetConstant(Value & 0xff, ValueType);
1465 else if (ValueType == MVT::i8
1466 && (Value & 0xff) == Value)
1467 return DAG.getTargetConstant(Value, ValueType);
1473 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1474 /// and the value fits into a signed 16-bit constant, and if so, return the
1476 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1478 if (ConstantSDNode *CN = getVecImm(N)) {
1479 uint64_t Value = CN->getZExtValue();
1480 if ((ValueType == MVT::i32
1481 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1482 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1483 return DAG.getTargetConstant(Value >> 16, ValueType);
1489 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1490 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1491 if (ConstantSDNode *CN = getVecImm(N)) {
1492 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1498 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1499 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1500 if (ConstantSDNode *CN = getVecImm(N)) {
1501 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1507 //! Lower a BUILD_VECTOR instruction creatively:
1509 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1510 MVT VT = Op.getValueType();
1511 MVT EltVT = VT.getVectorElementType();
1512 DebugLoc dl = Op.getDebugLoc();
1513 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1514 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1515 unsigned minSplatBits = EltVT.getSizeInBits();
1517 if (minSplatBits < 16)
1520 APInt APSplatBits, APSplatUndef;
1521 unsigned SplatBitSize;
1524 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1525 HasAnyUndefs, minSplatBits)
1526 || minSplatBits < SplatBitSize)
1527 return SDValue(); // Wasn't a constant vector or splat exceeded min
1529 uint64_t SplatBits = APSplatBits.getZExtValue();
1531 switch (VT.getSimpleVT()) {
1533 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1534 << VT.getMVTString()
1539 uint32_t Value32 = uint32_t(SplatBits);
1540 assert(SplatBitSize == 32
1541 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1542 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1543 SDValue T = DAG.getConstant(Value32, MVT::i32);
1544 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1545 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1549 uint64_t f64val = uint64_t(SplatBits);
1550 assert(SplatBitSize == 64
1551 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1552 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1553 SDValue T = DAG.getConstant(f64val, MVT::i64);
1554 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1555 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1559 // 8-bit constants have to be expanded to 16-bits
1560 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1561 SmallVector<SDValue, 8> Ops;
1563 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1564 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1565 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1568 unsigned short Value16 = SplatBits;
1569 SDValue T = DAG.getConstant(Value16, EltVT);
1570 SmallVector<SDValue, 8> Ops;
1573 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1576 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1577 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1580 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1581 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1584 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1594 SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1596 uint32_t upper = uint32_t(SplatVal >> 32);
1597 uint32_t lower = uint32_t(SplatVal);
1599 if (upper == lower) {
1600 // Magic constant that can be matched by IL, ILA, et. al.
1601 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1602 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1603 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1604 Val, Val, Val, Val));
1606 bool upper_special, lower_special;
1608 // NOTE: This code creates common-case shuffle masks that can be easily
1609 // detected as common expressions. It is not attempting to create highly
1610 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1612 // Detect if the upper or lower half is a special shuffle mask pattern:
1613 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1614 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1616 // Both upper and lower are special, lower to a constant pool load:
1617 if (lower_special && upper_special) {
1618 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1619 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1620 SplatValCN, SplatValCN);
1625 SmallVector<SDValue, 16> ShufBytes;
1628 // Create lower vector if not a special pattern
1629 if (!lower_special) {
1630 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1631 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1632 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1633 LO32C, LO32C, LO32C, LO32C));
1636 // Create upper vector if not a special pattern
1637 if (!upper_special) {
1638 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1639 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1640 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1641 HI32C, HI32C, HI32C, HI32C));
1644 // If either upper or lower are special, then the two input operands are
1645 // the same (basically, one of them is a "don't care")
1651 for (int i = 0; i < 4; ++i) {
1653 for (int j = 0; j < 4; ++j) {
1655 bool process_upper, process_lower;
1657 process_upper = (upper_special && (i & 1) == 0);
1658 process_lower = (lower_special && (i & 1) == 1);
1660 if (process_upper || process_lower) {
1661 if ((process_upper && upper == 0)
1662 || (process_lower && lower == 0))
1664 else if ((process_upper && upper == 0xffffffff)
1665 || (process_lower && lower == 0xffffffff))
1667 else if ((process_upper && upper == 0x80000000)
1668 || (process_lower && lower == 0x80000000))
1669 val |= (j == 0 ? 0xe0 : 0x80);
1671 val |= i * 4 + j + ((i & 1) * 16);
1674 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1677 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1678 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1679 &ShufBytes[0], ShufBytes.size()));
1683 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1684 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1685 /// permutation vector, V3, is monotonically increasing with one "exception"
1686 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1687 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1688 /// In either case, the net result is going to eventually invoke SHUFB to
1689 /// permute/shuffle the bytes from V1 and V2.
1691 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1692 /// control word for byte/halfword/word insertion. This takes care of a single
1693 /// element move from V2 into V1.
1695 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1696 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1697 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1698 SDValue V1 = Op.getOperand(0);
1699 SDValue V2 = Op.getOperand(1);
1700 DebugLoc dl = Op.getDebugLoc();
1702 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1704 // If we have a single element being moved from V1 to V2, this can be handled
1705 // using the C*[DX] compute mask instructions, but the vector elements have
1706 // to be monotonically increasing with one exception element.
1707 MVT VecVT = V1.getValueType();
1708 MVT EltVT = VecVT.getVectorElementType();
1709 unsigned EltsFromV2 = 0;
1711 unsigned V2EltIdx0 = 0;
1712 unsigned CurrElt = 0;
1713 unsigned MaxElts = VecVT.getVectorNumElements();
1714 unsigned PrevElt = 0;
1716 bool monotonic = true;
1719 if (EltVT == MVT::i8) {
1721 } else if (EltVT == MVT::i16) {
1723 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1725 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1728 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1730 for (unsigned i = 0; i != MaxElts; ++i) {
1731 if (SVN->getMaskElt(i) < 0)
1734 unsigned SrcElt = SVN->getMaskElt(i);
1737 if (SrcElt >= V2EltIdx0) {
1738 if (1 >= (++EltsFromV2)) {
1739 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1741 } else if (CurrElt != SrcElt) {
1749 if (PrevElt > 0 && SrcElt < MaxElts) {
1750 if ((PrevElt == SrcElt - 1)
1751 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1758 } else if (PrevElt == 0) {
1759 // First time through, need to keep track of previous element
1762 // This isn't a rotation, takes elements from vector 2
1768 if (EltsFromV2 == 1 && monotonic) {
1769 // Compute mask and shuffle
1770 MachineFunction &MF = DAG.getMachineFunction();
1771 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1772 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1773 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1774 // Initialize temporary register to 0
1775 SDValue InitTempReg =
1776 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1777 // Copy register's contents as index in SHUFFLE_MASK:
1778 SDValue ShufMaskOp =
1779 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1780 DAG.getTargetConstant(V2Elt, MVT::i32),
1781 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1782 // Use shuffle mask in SHUFB synthetic instruction:
1783 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1785 } else if (rotate) {
1786 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1788 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1789 V1, DAG.getConstant(rotamt, MVT::i16));
1791 // Convert the SHUFFLE_VECTOR mask's input element units to the
1793 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1795 SmallVector<SDValue, 16> ResultMask;
1796 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1797 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1799 for (unsigned j = 0; j < BytesPerElement; ++j)
1800 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1803 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1804 &ResultMask[0], ResultMask.size());
1805 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1809 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1810 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1811 DebugLoc dl = Op.getDebugLoc();
1813 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1814 // For a constant, build the appropriate constant vector, which will
1815 // eventually simplify to a vector register load.
1817 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1818 SmallVector<SDValue, 16> ConstVecValues;
1822 // Create a constant vector:
1823 switch (Op.getValueType().getSimpleVT()) {
1824 default: assert(0 && "Unexpected constant value type in "
1825 "LowerSCALAR_TO_VECTOR");
1826 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1827 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1828 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1829 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1830 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1831 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1834 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1835 for (size_t j = 0; j < n_copies; ++j)
1836 ConstVecValues.push_back(CValue);
1838 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1839 &ConstVecValues[0], ConstVecValues.size());
1841 // Otherwise, copy the value from one register to another:
1842 switch (Op0.getValueType().getSimpleVT()) {
1843 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1850 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1857 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1858 MVT VT = Op.getValueType();
1859 SDValue N = Op.getOperand(0);
1860 SDValue Elt = Op.getOperand(1);
1861 DebugLoc dl = Op.getDebugLoc();
1864 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1865 // Constant argument:
1866 int EltNo = (int) C->getZExtValue();
1869 if (VT == MVT::i8 && EltNo >= 16)
1870 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1871 else if (VT == MVT::i16 && EltNo >= 8)
1872 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1873 else if (VT == MVT::i32 && EltNo >= 4)
1874 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1875 else if (VT == MVT::i64 && EltNo >= 2)
1876 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1878 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1879 // i32 and i64: Element 0 is the preferred slot
1880 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1883 // Need to generate shuffle mask and extract:
1884 int prefslot_begin = -1, prefslot_end = -1;
1885 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1887 switch (VT.getSimpleVT()) {
1889 assert(false && "Invalid value type!");
1891 prefslot_begin = prefslot_end = 3;
1895 prefslot_begin = 2; prefslot_end = 3;
1900 prefslot_begin = 0; prefslot_end = 3;
1905 prefslot_begin = 0; prefslot_end = 7;
1910 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1911 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1913 unsigned int ShufBytes[16];
1914 for (int i = 0; i < 16; ++i) {
1915 // zero fill uppper part of preferred slot, don't care about the
1917 unsigned int mask_val;
1918 if (i <= prefslot_end) {
1920 ((i < prefslot_begin)
1922 : elt_byte + (i - prefslot_begin));
1924 ShufBytes[i] = mask_val;
1926 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1929 SDValue ShufMask[4];
1930 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1931 unsigned bidx = i * 4;
1932 unsigned int bits = ((ShufBytes[bidx] << 24) |
1933 (ShufBytes[bidx+1] << 16) |
1934 (ShufBytes[bidx+2] << 8) |
1936 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1939 SDValue ShufMaskVec =
1940 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1941 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1943 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1944 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1945 N, N, ShufMaskVec));
1947 // Variable index: Rotate the requested element into slot 0, then replicate
1948 // slot 0 across the vector
1949 MVT VecVT = N.getValueType();
1950 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1951 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1955 // Make life easier by making sure the index is zero-extended to i32
1956 if (Elt.getValueType() != MVT::i32)
1957 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
1959 // Scale the index to a bit/byte shift quantity
1961 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1962 unsigned scaleShift = scaleFactor.logBase2();
1965 if (scaleShift > 0) {
1966 // Scale the shift factor:
1967 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
1968 DAG.getConstant(scaleShift, MVT::i32));
1971 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
1973 // Replicate the bytes starting at byte 0 across the entire vector (for
1974 // consistency with the notion of a unified register set)
1977 switch (VT.getSimpleVT()) {
1979 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
1983 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
1984 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1985 factor, factor, factor, factor);
1989 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
1990 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1991 factor, factor, factor, factor);
1996 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
1997 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1998 factor, factor, factor, factor);
2003 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2004 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2005 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2006 loFactor, hiFactor, loFactor, hiFactor);
2011 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2012 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2013 vecShift, vecShift, replicate));
2019 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2020 SDValue VecOp = Op.getOperand(0);
2021 SDValue ValOp = Op.getOperand(1);
2022 SDValue IdxOp = Op.getOperand(2);
2023 DebugLoc dl = Op.getDebugLoc();
2024 MVT VT = Op.getValueType();
2026 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2027 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2029 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2030 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2031 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2032 DAG.getRegister(SPU::R1, PtrVT),
2033 DAG.getConstant(CN->getSExtValue(), PtrVT));
2034 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2037 DAG.getNode(SPUISD::SHUFB, dl, VT,
2038 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2040 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2045 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2046 const TargetLowering &TLI)
2048 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2049 DebugLoc dl = Op.getDebugLoc();
2050 MVT ShiftVT = TLI.getShiftAmountTy();
2052 assert(Op.getValueType() == MVT::i8);
2055 assert(0 && "Unhandled i8 math operator");
2059 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2061 SDValue N1 = Op.getOperand(1);
2062 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2063 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2064 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2065 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2070 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2072 SDValue N1 = Op.getOperand(1);
2073 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2074 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2075 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2076 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2080 SDValue N1 = Op.getOperand(1);
2081 MVT N1VT = N1.getValueType();
2083 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2084 if (!N1VT.bitsEq(ShiftVT)) {
2085 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2088 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2091 // Replicate lower 8-bits into upper 8:
2093 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2094 DAG.getNode(ISD::SHL, dl, MVT::i16,
2095 N0, DAG.getConstant(8, MVT::i32)));
2097 // Truncate back down to i8
2098 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2099 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2103 SDValue N1 = Op.getOperand(1);
2104 MVT N1VT = N1.getValueType();
2106 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2107 if (!N1VT.bitsEq(ShiftVT)) {
2108 unsigned N1Opc = ISD::ZERO_EXTEND;
2110 if (N1.getValueType().bitsGT(ShiftVT))
2111 N1Opc = ISD::TRUNCATE;
2113 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2116 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2117 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2120 SDValue N1 = Op.getOperand(1);
2121 MVT N1VT = N1.getValueType();
2123 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2124 if (!N1VT.bitsEq(ShiftVT)) {
2125 unsigned N1Opc = ISD::SIGN_EXTEND;
2127 if (N1VT.bitsGT(ShiftVT))
2128 N1Opc = ISD::TRUNCATE;
2129 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2132 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2133 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2136 SDValue N1 = Op.getOperand(1);
2138 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2139 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2140 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2141 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2149 //! Lower byte immediate operations for v16i8 vectors:
2151 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2154 MVT VT = Op.getValueType();
2155 DebugLoc dl = Op.getDebugLoc();
2157 ConstVec = Op.getOperand(0);
2158 Arg = Op.getOperand(1);
2159 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2160 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2161 ConstVec = ConstVec.getOperand(0);
2163 ConstVec = Op.getOperand(1);
2164 Arg = Op.getOperand(0);
2165 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2166 ConstVec = ConstVec.getOperand(0);
2171 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2172 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2173 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2175 APInt APSplatBits, APSplatUndef;
2176 unsigned SplatBitSize;
2178 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2180 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2181 HasAnyUndefs, minSplatBits)
2182 && minSplatBits <= SplatBitSize) {
2183 uint64_t SplatBits = APSplatBits.getZExtValue();
2184 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2186 SmallVector<SDValue, 16> tcVec;
2187 tcVec.assign(16, tc);
2188 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2189 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2193 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2194 // lowered. Return the operation, rather than a null SDValue.
2198 //! Custom lowering for CTPOP (count population)
2200 Custom lowering code that counts the number ones in the input
2201 operand. SPU has such an instruction, but it counts the number of
2202 ones per byte, which then have to be accumulated.
2204 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2205 MVT VT = Op.getValueType();
2206 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2207 DebugLoc dl = Op.getDebugLoc();
2209 switch (VT.getSimpleVT()) {
2211 assert(false && "Invalid value type!");
2213 SDValue N = Op.getOperand(0);
2214 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2216 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2217 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2219 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2223 MachineFunction &MF = DAG.getMachineFunction();
2224 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2226 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2228 SDValue N = Op.getOperand(0);
2229 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2230 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2231 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2233 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2234 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2236 // CNTB_result becomes the chain to which all of the virtual registers
2237 // CNTB_reg, SUM1_reg become associated:
2238 SDValue CNTB_result =
2239 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2241 SDValue CNTB_rescopy =
2242 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2244 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2246 return DAG.getNode(ISD::AND, dl, MVT::i16,
2247 DAG.getNode(ISD::ADD, dl, MVT::i16,
2248 DAG.getNode(ISD::SRL, dl, MVT::i16,
2255 MachineFunction &MF = DAG.getMachineFunction();
2256 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2258 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2259 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2261 SDValue N = Op.getOperand(0);
2262 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2263 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2264 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2265 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2267 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2268 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2270 // CNTB_result becomes the chain to which all of the virtual registers
2271 // CNTB_reg, SUM1_reg become associated:
2272 SDValue CNTB_result =
2273 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2275 SDValue CNTB_rescopy =
2276 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2279 DAG.getNode(ISD::SRL, dl, MVT::i32,
2280 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2284 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2285 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2287 SDValue Sum1_rescopy =
2288 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2291 DAG.getNode(ISD::SRL, dl, MVT::i32,
2292 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2295 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2296 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2298 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2308 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2310 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2311 All conversions to i64 are expanded to a libcall.
2313 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2314 SPUTargetLowering &TLI) {
2315 MVT OpVT = Op.getValueType();
2316 SDValue Op0 = Op.getOperand(0);
2317 MVT Op0VT = Op0.getValueType();
2319 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2320 || OpVT == MVT::i64) {
2321 // Convert f32 / f64 to i32 / i64 via libcall.
2323 (Op.getOpcode() == ISD::FP_TO_SINT)
2324 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2325 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2326 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2328 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2334 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2336 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2337 All conversions from i64 are expanded to a libcall.
2339 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2340 SPUTargetLowering &TLI) {
2341 MVT OpVT = Op.getValueType();
2342 SDValue Op0 = Op.getOperand(0);
2343 MVT Op0VT = Op0.getValueType();
2345 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2346 || Op0VT == MVT::i64) {
2347 // Convert i32, i64 to f64 via libcall:
2349 (Op.getOpcode() == ISD::SINT_TO_FP)
2350 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2351 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2352 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2354 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2360 //! Lower ISD::SETCC
2362 This handles MVT::f64 (double floating point) condition lowering
2364 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2365 const TargetLowering &TLI) {
2366 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2367 DebugLoc dl = Op.getDebugLoc();
2368 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2370 SDValue lhs = Op.getOperand(0);
2371 SDValue rhs = Op.getOperand(1);
2372 MVT lhsVT = lhs.getValueType();
2373 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2375 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2376 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2377 MVT IntVT(MVT::i64);
2379 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2380 // selected to a NOP:
2381 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2383 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2384 DAG.getNode(ISD::SRL, dl, IntVT,
2385 i64lhs, DAG.getConstant(32, MVT::i32)));
2386 SDValue lhsHi32abs =
2387 DAG.getNode(ISD::AND, dl, MVT::i32,
2388 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2390 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2392 // SETO and SETUO only use the lhs operand:
2393 if (CC->get() == ISD::SETO) {
2394 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2396 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2397 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2398 DAG.getSetCC(dl, ccResultVT,
2399 lhs, DAG.getConstantFP(0.0, lhsVT),
2401 DAG.getConstant(ccResultAllOnes, ccResultVT));
2402 } else if (CC->get() == ISD::SETUO) {
2403 // Evaluates to true if Op0 is [SQ]NaN
2404 return DAG.getNode(ISD::AND, dl, ccResultVT,
2405 DAG.getSetCC(dl, ccResultVT,
2407 DAG.getConstant(0x7ff00000, MVT::i32),
2409 DAG.getSetCC(dl, ccResultVT,
2411 DAG.getConstant(0, MVT::i32),
2415 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2417 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2418 DAG.getNode(ISD::SRL, dl, IntVT,
2419 i64rhs, DAG.getConstant(32, MVT::i32)));
2421 // If a value is negative, subtract from the sign magnitude constant:
2422 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2424 // Convert the sign-magnitude representation into 2's complement:
2425 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2426 lhsHi32, DAG.getConstant(31, MVT::i32));
2427 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2429 DAG.getNode(ISD::SELECT, dl, IntVT,
2430 lhsSelectMask, lhsSignMag2TC, i64lhs);
2432 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2433 rhsHi32, DAG.getConstant(31, MVT::i32));
2434 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2436 DAG.getNode(ISD::SELECT, dl, IntVT,
2437 rhsSelectMask, rhsSignMag2TC, i64rhs);
2441 switch (CC->get()) {
2444 compareOp = ISD::SETEQ; break;
2447 compareOp = ISD::SETGT; break;
2450 compareOp = ISD::SETGE; break;
2453 compareOp = ISD::SETLT; break;
2456 compareOp = ISD::SETLE; break;
2459 compareOp = ISD::SETNE; break;
2461 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2467 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2468 (ISD::CondCode) compareOp);
2470 if ((CC->get() & 0x8) == 0) {
2471 // Ordered comparison:
2472 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2473 lhs, DAG.getConstantFP(0.0, MVT::f64),
2475 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2476 rhs, DAG.getConstantFP(0.0, MVT::f64),
2478 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2480 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2486 //! Lower ISD::SELECT_CC
2488 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2491 \note Need to revisit this in the future: if the code path through the true
2492 and false value computations is longer than the latency of a branch (6
2493 cycles), then it would be more advantageous to branch and insert a new basic
2494 block and branch on the condition. However, this code does not make that
2495 assumption, given the simplisitc uses so far.
2498 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2499 const TargetLowering &TLI) {
2500 MVT VT = Op.getValueType();
2501 SDValue lhs = Op.getOperand(0);
2502 SDValue rhs = Op.getOperand(1);
2503 SDValue trueval = Op.getOperand(2);
2504 SDValue falseval = Op.getOperand(3);
2505 SDValue condition = Op.getOperand(4);
2506 DebugLoc dl = Op.getDebugLoc();
2508 // NOTE: SELB's arguments: $rA, $rB, $mask
2510 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2511 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2512 // condition was true and 0s where the condition was false. Hence, the
2513 // arguments to SELB get reversed.
2515 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2516 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2517 // with another "cannot select select_cc" assert:
2519 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2520 TLI.getSetCCResultType(Op.getValueType()),
2521 lhs, rhs, condition);
2522 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2525 //! Custom lower ISD::TRUNCATE
2526 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2528 // Type to truncate to
2529 MVT VT = Op.getValueType();
2530 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2531 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2532 DebugLoc dl = Op.getDebugLoc();
2534 // Type to truncate from
2535 SDValue Op0 = Op.getOperand(0);
2536 MVT Op0VT = Op0.getValueType();
2538 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2539 // Create shuffle mask, least significant doubleword of quadword
2540 unsigned maskHigh = 0x08090a0b;
2541 unsigned maskLow = 0x0c0d0e0f;
2542 // Use a shuffle to perform the truncation
2543 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2544 DAG.getConstant(maskHigh, MVT::i32),
2545 DAG.getConstant(maskLow, MVT::i32),
2546 DAG.getConstant(maskHigh, MVT::i32),
2547 DAG.getConstant(maskLow, MVT::i32));
2549 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2550 Op0, Op0, shufMask);
2552 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2555 return SDValue(); // Leave the truncate unmolested
2558 //! Custom (target-specific) lowering entry point
2560 This is where LLVM's DAG selection process calls to do target-specific
2564 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2566 unsigned Opc = (unsigned) Op.getOpcode();
2567 MVT VT = Op.getValueType();
2571 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2572 cerr << "Op.getOpcode() = " << Opc << "\n";
2573 cerr << "*Op.getNode():\n";
2574 Op.getNode()->dump();
2581 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2583 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2584 case ISD::ConstantPool:
2585 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2586 case ISD::GlobalAddress:
2587 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2588 case ISD::JumpTable:
2589 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2590 case ISD::ConstantFP:
2591 return LowerConstantFP(Op, DAG);
2592 case ISD::FORMAL_ARGUMENTS:
2593 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2595 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2597 return LowerRET(Op, DAG, getTargetMachine());
2599 // i8, i64 math ops:
2608 return LowerI8Math(Op, DAG, Opc, *this);
2612 case ISD::FP_TO_SINT:
2613 case ISD::FP_TO_UINT:
2614 return LowerFP_TO_INT(Op, DAG, *this);
2616 case ISD::SINT_TO_FP:
2617 case ISD::UINT_TO_FP:
2618 return LowerINT_TO_FP(Op, DAG, *this);
2620 // Vector-related lowering.
2621 case ISD::BUILD_VECTOR:
2622 return LowerBUILD_VECTOR(Op, DAG);
2623 case ISD::SCALAR_TO_VECTOR:
2624 return LowerSCALAR_TO_VECTOR(Op, DAG);
2625 case ISD::VECTOR_SHUFFLE:
2626 return LowerVECTOR_SHUFFLE(Op, DAG);
2627 case ISD::EXTRACT_VECTOR_ELT:
2628 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2629 case ISD::INSERT_VECTOR_ELT:
2630 return LowerINSERT_VECTOR_ELT(Op, DAG);
2632 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2636 return LowerByteImmed(Op, DAG);
2638 // Vector and i8 multiply:
2641 return LowerI8Math(Op, DAG, Opc, *this);
2644 return LowerCTPOP(Op, DAG);
2646 case ISD::SELECT_CC:
2647 return LowerSELECT_CC(Op, DAG, *this);
2650 return LowerSETCC(Op, DAG, *this);
2653 return LowerTRUNCATE(Op, DAG);
2659 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2660 SmallVectorImpl<SDValue>&Results,
2664 unsigned Opc = (unsigned) N->getOpcode();
2665 MVT OpVT = N->getValueType(0);
2669 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2670 cerr << "Op.getOpcode() = " << Opc << "\n";
2671 cerr << "*Op.getNode():\n";
2679 /* Otherwise, return unchanged */
2682 //===----------------------------------------------------------------------===//
2683 // Target Optimization Hooks
2684 //===----------------------------------------------------------------------===//
2687 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2690 TargetMachine &TM = getTargetMachine();
2692 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2693 SelectionDAG &DAG = DCI.DAG;
2694 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2695 MVT NodeVT = N->getValueType(0); // The node's value type
2696 MVT Op0VT = Op0.getValueType(); // The first operand's result
2697 SDValue Result; // Initially, empty result
2698 DebugLoc dl = N->getDebugLoc();
2700 switch (N->getOpcode()) {
2703 SDValue Op1 = N->getOperand(1);
2705 if (Op0.getOpcode() == SPUISD::IndirectAddr
2706 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2707 // Normalize the operands to reduce repeated code
2708 SDValue IndirectArg = Op0, AddArg = Op1;
2710 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2715 if (isa<ConstantSDNode>(AddArg)) {
2716 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2717 SDValue IndOp1 = IndirectArg.getOperand(1);
2719 if (CN0->isNullValue()) {
2720 // (add (SPUindirect <arg>, <arg>), 0) ->
2721 // (SPUindirect <arg>, <arg>)
2723 #if !defined(NDEBUG)
2724 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2726 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2727 << "With: (SPUindirect <arg>, <arg>)\n";
2732 } else if (isa<ConstantSDNode>(IndOp1)) {
2733 // (add (SPUindirect <arg>, <const>), <const>) ->
2734 // (SPUindirect <arg>, <const + const>)
2735 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2736 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2737 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2739 #if !defined(NDEBUG)
2740 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2742 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2743 << "), " << CN0->getSExtValue() << ")\n"
2744 << "With: (SPUindirect <arg>, "
2745 << combinedConst << ")\n";
2749 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2750 IndirectArg, combinedValue);
2756 case ISD::SIGN_EXTEND:
2757 case ISD::ZERO_EXTEND:
2758 case ISD::ANY_EXTEND: {
2759 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2760 // (any_extend (SPUextract_elt0 <arg>)) ->
2761 // (SPUextract_elt0 <arg>)
2762 // Types must match, however...
2763 #if !defined(NDEBUG)
2764 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2765 cerr << "\nReplace: ";
2768 Op0.getNode()->dump(&DAG);
2777 case SPUISD::IndirectAddr: {
2778 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2779 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2780 if (CN != 0 && CN->getZExtValue() == 0) {
2781 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2782 // (SPUaform <addr>, 0)
2784 DEBUG(cerr << "Replace: ");
2785 DEBUG(N->dump(&DAG));
2786 DEBUG(cerr << "\nWith: ");
2787 DEBUG(Op0.getNode()->dump(&DAG));
2788 DEBUG(cerr << "\n");
2792 } else if (Op0.getOpcode() == ISD::ADD) {
2793 SDValue Op1 = N->getOperand(1);
2794 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2795 // (SPUindirect (add <arg>, <arg>), 0) ->
2796 // (SPUindirect <arg>, <arg>)
2797 if (CN1->isNullValue()) {
2799 #if !defined(NDEBUG)
2800 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2802 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2803 << "With: (SPUindirect <arg>, <arg>)\n";
2807 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2808 Op0.getOperand(0), Op0.getOperand(1));
2814 case SPUISD::SHLQUAD_L_BITS:
2815 case SPUISD::SHLQUAD_L_BYTES:
2816 case SPUISD::VEC_SHL:
2817 case SPUISD::VEC_SRL:
2818 case SPUISD::VEC_SRA:
2819 case SPUISD::ROTBYTES_LEFT: {
2820 SDValue Op1 = N->getOperand(1);
2822 // Kill degenerate vector shifts:
2823 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2824 if (CN->isNullValue()) {
2830 case SPUISD::PREFSLOT2VEC: {
2831 switch (Op0.getOpcode()) {
2834 case ISD::ANY_EXTEND:
2835 case ISD::ZERO_EXTEND:
2836 case ISD::SIGN_EXTEND: {
2837 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2839 // but only if the SPUprefslot2vec and <arg> types match.
2840 SDValue Op00 = Op0.getOperand(0);
2841 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2842 SDValue Op000 = Op00.getOperand(0);
2843 if (Op000.getValueType() == NodeVT) {
2849 case SPUISD::VEC2PREFSLOT: {
2850 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2852 Result = Op0.getOperand(0);
2860 // Otherwise, return unchanged.
2862 if (Result.getNode()) {
2863 DEBUG(cerr << "\nReplace.SPU: ");
2864 DEBUG(N->dump(&DAG));
2865 DEBUG(cerr << "\nWith: ");
2866 DEBUG(Result.getNode()->dump(&DAG));
2867 DEBUG(cerr << "\n");
2874 //===----------------------------------------------------------------------===//
2875 // Inline Assembly Support
2876 //===----------------------------------------------------------------------===//
2878 /// getConstraintType - Given a constraint letter, return the type of
2879 /// constraint it is for this target.
2880 SPUTargetLowering::ConstraintType
2881 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2882 if (ConstraintLetter.size() == 1) {
2883 switch (ConstraintLetter[0]) {
2890 return C_RegisterClass;
2893 return TargetLowering::getConstraintType(ConstraintLetter);
2896 std::pair<unsigned, const TargetRegisterClass*>
2897 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2900 if (Constraint.size() == 1) {
2901 // GCC RS6000 Constraint Letters
2902 switch (Constraint[0]) {
2906 return std::make_pair(0U, SPU::R64CRegisterClass);
2907 return std::make_pair(0U, SPU::R32CRegisterClass);
2910 return std::make_pair(0U, SPU::R32FPRegisterClass);
2911 else if (VT == MVT::f64)
2912 return std::make_pair(0U, SPU::R64FPRegisterClass);
2915 return std::make_pair(0U, SPU::GPRCRegisterClass);
2919 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2922 //! Compute used/known bits for a SPU operand
2924 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2928 const SelectionDAG &DAG,
2929 unsigned Depth ) const {
2931 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2933 switch (Op.getOpcode()) {
2935 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2941 case SPUISD::PREFSLOT2VEC:
2942 case SPUISD::LDRESULT:
2943 case SPUISD::VEC2PREFSLOT:
2944 case SPUISD::SHLQUAD_L_BITS:
2945 case SPUISD::SHLQUAD_L_BYTES:
2946 case SPUISD::VEC_SHL:
2947 case SPUISD::VEC_SRL:
2948 case SPUISD::VEC_SRA:
2949 case SPUISD::VEC_ROTL:
2950 case SPUISD::VEC_ROTR:
2951 case SPUISD::ROTBYTES_LEFT:
2952 case SPUISD::SELECT_MASK:
2959 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2960 unsigned Depth) const {
2961 switch (Op.getOpcode()) {
2966 MVT VT = Op.getValueType();
2968 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2971 return VT.getSizeInBits();
2976 // LowerAsmOperandForConstraint
2978 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2979 char ConstraintLetter,
2981 std::vector<SDValue> &Ops,
2982 SelectionDAG &DAG) const {
2983 // Default, for the time being, to the base class handler
2984 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2988 /// isLegalAddressImmediate - Return true if the integer value can be used
2989 /// as the offset of the target addressing mode.
2990 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2991 const Type *Ty) const {
2992 // SPU's addresses are 256K:
2993 return (V > -(1 << 18) && V < (1 << 18) - 1);
2996 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3001 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3002 // The SPU target isn't yet aware of offsets.