1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
73 cerr << "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG);
118 return CallInfo.first;
122 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
123 : TargetLowering(TM),
126 // Fold away setcc operations if possible.
129 // Use _setjmp/_longjmp instead of setjmp/longjmp.
130 setUseUnderscoreSetJmp(true);
131 setUseUnderscoreLongJmp(true);
133 // Set RTLIB libcall names as used by SPU:
134 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
136 // Set up the SPU's register classes:
137 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
138 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
139 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
140 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
141 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
142 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
143 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
145 // SPU has no sign or zero extended loads for i1, i8, i16:
146 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
147 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
150 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
153 // SPU constant load actions are custom lowered:
154 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
155 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
157 // SPU's loads and stores have to be custom lowered:
158 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
160 MVT VT = (MVT::SimpleValueType)sctype;
162 setOperationAction(ISD::LOAD, VT, Custom);
163 setOperationAction(ISD::STORE, VT, Custom);
164 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
165 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
168 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
169 MVT StoreVT = (MVT::SimpleValueType) stype;
170 setTruncStoreAction(VT, StoreVT, Expand);
174 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
176 MVT VT = (MVT::SimpleValueType) sctype;
178 setOperationAction(ISD::LOAD, VT, Custom);
179 setOperationAction(ISD::STORE, VT, Custom);
181 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
182 MVT StoreVT = (MVT::SimpleValueType) stype;
183 setTruncStoreAction(VT, StoreVT, Expand);
187 // Expand the jumptable branches
188 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
189 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
191 // Custom lower SELECT_CC for most cases, but expand by default
192 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
193 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
194 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
198 // SPU has no intrinsics for these particular operations:
199 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
201 // SPU has no SREM/UREM instructions
202 setOperationAction(ISD::SREM, MVT::i32, Expand);
203 setOperationAction(ISD::UREM, MVT::i32, Expand);
204 setOperationAction(ISD::SREM, MVT::i64, Expand);
205 setOperationAction(ISD::UREM, MVT::i64, Expand);
207 // We don't support sin/cos/sqrt/fmod
208 setOperationAction(ISD::FSIN , MVT::f64, Expand);
209 setOperationAction(ISD::FCOS , MVT::f64, Expand);
210 setOperationAction(ISD::FREM , MVT::f64, Expand);
211 setOperationAction(ISD::FSIN , MVT::f32, Expand);
212 setOperationAction(ISD::FCOS , MVT::f32, Expand);
213 setOperationAction(ISD::FREM , MVT::f32, Expand);
215 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
217 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
218 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
220 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
223 // SPU can do rotate right and left, so legalize it... but customize for i8
224 // because instructions don't exist.
226 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
228 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
229 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
232 setOperationAction(ISD::ROTL, MVT::i32, Legal);
233 setOperationAction(ISD::ROTL, MVT::i16, Legal);
234 setOperationAction(ISD::ROTL, MVT::i8, Custom);
236 // SPU has no native version of shift left/right for i8
237 setOperationAction(ISD::SHL, MVT::i8, Custom);
238 setOperationAction(ISD::SRL, MVT::i8, Custom);
239 setOperationAction(ISD::SRA, MVT::i8, Custom);
241 // Make these operations legal and handle them during instruction selection:
242 setOperationAction(ISD::SHL, MVT::i64, Legal);
243 setOperationAction(ISD::SRL, MVT::i64, Legal);
244 setOperationAction(ISD::SRA, MVT::i64, Legal);
246 // Custom lower i8, i32 and i64 multiplications
247 setOperationAction(ISD::MUL, MVT::i8, Custom);
248 setOperationAction(ISD::MUL, MVT::i32, Legal);
249 setOperationAction(ISD::MUL, MVT::i64, Legal);
251 // Need to custom handle (some) common i8, i64 math ops
252 setOperationAction(ISD::ADD, MVT::i8, Custom);
253 setOperationAction(ISD::ADD, MVT::i64, Legal);
254 setOperationAction(ISD::SUB, MVT::i8, Custom);
255 setOperationAction(ISD::SUB, MVT::i64, Legal);
257 // SPU does not have BSWAP. It does have i32 support CTLZ.
258 // CTPOP has to be custom lowered.
259 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
260 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
262 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
263 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
264 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
265 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
267 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
268 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
270 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
272 // SPU has a version of select that implements (a&~c)|(b&c), just like
273 // select ought to work:
274 setOperationAction(ISD::SELECT, MVT::i8, Legal);
275 setOperationAction(ISD::SELECT, MVT::i16, Legal);
276 setOperationAction(ISD::SELECT, MVT::i32, Legal);
277 setOperationAction(ISD::SELECT, MVT::i64, Legal);
279 setOperationAction(ISD::SETCC, MVT::i8, Legal);
280 setOperationAction(ISD::SETCC, MVT::i16, Legal);
281 setOperationAction(ISD::SETCC, MVT::i32, Legal);
282 setOperationAction(ISD::SETCC, MVT::i64, Legal);
283 setOperationAction(ISD::SETCC, MVT::f64, Custom);
285 // Custom lower i128 -> i64 truncates
286 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
288 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
289 // to expand to a libcall, hence the custom lowering:
290 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
291 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
293 // FDIV on SPU requires custom lowering
294 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
296 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
297 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
298 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
300 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
303 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
304 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
306 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
307 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
311 // We cannot sextinreg(i1). Expand to shifts.
312 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
314 // Support label based line numbers.
315 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
316 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
318 // We want to legalize GlobalAddress and ConstantPool nodes into the
319 // appropriate instructions to materialize the address.
320 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
322 MVT VT = (MVT::SimpleValueType)sctype;
324 setOperationAction(ISD::GlobalAddress, VT, Custom);
325 setOperationAction(ISD::ConstantPool, VT, Custom);
326 setOperationAction(ISD::JumpTable, VT, Custom);
329 // RET must be custom lowered, to meet ABI requirements
330 setOperationAction(ISD::RET, MVT::Other, Custom);
332 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
333 setOperationAction(ISD::VASTART , MVT::Other, Custom);
335 // Use the default implementation.
336 setOperationAction(ISD::VAARG , MVT::Other, Expand);
337 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
338 setOperationAction(ISD::VAEND , MVT::Other, Expand);
339 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
340 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
341 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
344 // Cell SPU has instructions for converting between i64 and fp.
345 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
346 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
348 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
349 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
351 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
352 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
354 // First set operation action for all vector types to expand. Then we
355 // will selectively turn on ones that can be effectively codegen'd.
356 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
357 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
363 // "Odd size" vector classes that we're willing to support:
364 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
366 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
367 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
368 MVT VT = (MVT::SimpleValueType)i;
370 // add/sub are legal for all supported vector VT's.
371 setOperationAction(ISD::ADD, VT, Legal);
372 setOperationAction(ISD::SUB, VT, Legal);
373 // mul has to be custom lowered.
374 setOperationAction(ISD::MUL, VT, Legal);
376 setOperationAction(ISD::AND, VT, Legal);
377 setOperationAction(ISD::OR, VT, Legal);
378 setOperationAction(ISD::XOR, VT, Legal);
379 setOperationAction(ISD::LOAD, VT, Legal);
380 setOperationAction(ISD::SELECT, VT, Legal);
381 setOperationAction(ISD::STORE, VT, Legal);
383 // These operations need to be expanded:
384 setOperationAction(ISD::SDIV, VT, Expand);
385 setOperationAction(ISD::SREM, VT, Expand);
386 setOperationAction(ISD::UDIV, VT, Expand);
387 setOperationAction(ISD::UREM, VT, Expand);
389 // Custom lower build_vector, constant pool spills, insert and
390 // extract vector elements:
391 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
392 setOperationAction(ISD::ConstantPool, VT, Custom);
393 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
394 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
395 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
399 setOperationAction(ISD::AND, MVT::v16i8, Custom);
400 setOperationAction(ISD::OR, MVT::v16i8, Custom);
401 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
402 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
404 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
406 setShiftAmountType(MVT::i32);
407 setBooleanContents(ZeroOrNegativeOneBooleanContent);
409 setStackPointerRegisterToSaveRestore(SPU::R1);
411 // We have target-specific dag combine patterns for the following nodes:
412 setTargetDAGCombine(ISD::ADD);
413 setTargetDAGCombine(ISD::ZERO_EXTEND);
414 setTargetDAGCombine(ISD::SIGN_EXTEND);
415 setTargetDAGCombine(ISD::ANY_EXTEND);
417 computeRegisterProperties();
419 // Set pre-RA register scheduler default to BURR, which produces slightly
420 // better code than the default (could also be TDRR, but TargetLowering.h
421 // needs a mod to support that model):
422 setSchedulingPreference(SchedulingForRegPressure);
426 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
428 if (node_names.empty()) {
429 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
430 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
431 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
432 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
433 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
434 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
435 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
436 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
437 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
438 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
439 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
440 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
441 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
442 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
444 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
445 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
446 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
447 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
448 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
449 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
451 "SPUISD::ROTBYTES_LEFT_BITS";
452 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
453 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
454 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
455 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
456 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
459 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
461 return ((i != node_names.end()) ? i->second : 0);
464 //===----------------------------------------------------------------------===//
465 // Return the Cell SPU's SETCC result type
466 //===----------------------------------------------------------------------===//
468 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
469 // i16 and i32 are valid SETCC result types
470 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
473 //===----------------------------------------------------------------------===//
474 // Calling convention code:
475 //===----------------------------------------------------------------------===//
477 #include "SPUGenCallingConv.inc"
479 //===----------------------------------------------------------------------===//
480 // LowerOperation implementation
481 //===----------------------------------------------------------------------===//
483 /// Custom lower loads for CellSPU
485 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
486 within a 16-byte block, we have to rotate to extract the requested element.
488 For extending loads, we also want to ensure that the following sequence is
489 emitted, e.g. for MVT::f32 extending load to MVT::f64:
493 %2 v16i8,ch = rotate %1
494 %3 v4f8, ch = bitconvert %2
495 %4 f32 = vec2perfslot %3
496 %5 f64 = fp_extend %4
500 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
501 LoadSDNode *LN = cast<LoadSDNode>(Op);
502 SDValue the_chain = LN->getChain();
503 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
504 MVT InVT = LN->getMemoryVT();
505 MVT OutVT = Op.getValueType();
506 ISD::LoadExtType ExtType = LN->getExtensionType();
507 unsigned alignment = LN->getAlignment();
508 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
510 switch (LN->getAddressingMode()) {
511 case ISD::UNINDEXED: {
513 SDValue basePtr = LN->getBasePtr();
516 if (alignment == 16) {
519 // Special cases for a known aligned load to simplify the base pointer
520 // and the rotation amount:
521 if (basePtr.getOpcode() == ISD::ADD
522 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
523 // Known offset into basePtr
524 int64_t offset = CN->getSExtValue();
525 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
530 rotate = DAG.getConstant(rotamt, MVT::i16);
532 // Simplify the base pointer for this case:
533 basePtr = basePtr.getOperand(0);
534 if ((offset & ~0xf) > 0) {
535 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
537 DAG.getConstant((offset & ~0xf), PtrVT));
539 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
540 || (basePtr.getOpcode() == SPUISD::IndirectAddr
541 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
542 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
543 // Plain aligned a-form address: rotate into preferred slot
544 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
545 int64_t rotamt = -vtm->prefslot_byte;
548 rotate = DAG.getConstant(rotamt, MVT::i16);
550 // Offset the rotate amount by the basePtr and the preferred slot
552 int64_t rotamt = -vtm->prefslot_byte;
555 rotate = DAG.getNode(ISD::ADD, PtrVT,
557 DAG.getConstant(rotamt, PtrVT));
560 // Unaligned load: must be more pessimistic about addressing modes:
561 if (basePtr.getOpcode() == ISD::ADD) {
562 MachineFunction &MF = DAG.getMachineFunction();
563 MachineRegisterInfo &RegInfo = MF.getRegInfo();
564 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
567 SDValue Op0 = basePtr.getOperand(0);
568 SDValue Op1 = basePtr.getOperand(1);
570 if (isa<ConstantSDNode>(Op1)) {
571 // Convert the (add <ptr>, <const>) to an indirect address contained
572 // in a register. Note that this is done because we need to avoid
573 // creating a 0(reg) d-form address due to the SPU's block loads.
574 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
575 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
576 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
578 // Convert the (add <arg1>, <arg2>) to an indirect address, which
579 // will likely be lowered as a reg(reg) x-form address.
580 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
583 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
585 DAG.getConstant(0, PtrVT));
588 // Offset the rotate amount by the basePtr and the preferred slot
590 rotate = DAG.getNode(ISD::ADD, PtrVT,
592 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
595 // Re-emit as a v16i8 vector load
596 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 LN->isVolatile(), 16);
601 the_chain = result.getValue(1);
603 // Rotate into the preferred slot:
604 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
605 result.getValue(0), rotate);
607 // Convert the loaded v16i8 vector to the appropriate vector type
608 // specified by the operand:
609 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
610 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
611 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
613 // Handle extending loads by extending the scalar result:
614 if (ExtType == ISD::SEXTLOAD) {
615 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
616 } else if (ExtType == ISD::ZEXTLOAD) {
617 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
618 } else if (ExtType == ISD::EXTLOAD) {
619 unsigned NewOpc = ISD::ANY_EXTEND;
621 if (OutVT.isFloatingPoint())
622 NewOpc = ISD::FP_EXTEND;
624 result = DAG.getNode(NewOpc, OutVT, result);
627 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
628 SDValue retops[2] = {
633 result = DAG.getNode(SPUISD::LDRESULT, retvts,
634 retops, sizeof(retops) / sizeof(retops[0]));
641 case ISD::LAST_INDEXED_MODE:
642 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
644 cerr << (unsigned) LN->getAddressingMode() << "\n";
652 /// Custom lower stores for CellSPU
654 All CellSPU stores are aligned to 16-byte boundaries, so for elements
655 within a 16-byte block, we have to generate a shuffle to insert the
656 requested element into its place, then store the resulting block.
659 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
660 StoreSDNode *SN = cast<StoreSDNode>(Op);
661 SDValue Value = SN->getValue();
662 MVT VT = Value.getValueType();
663 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
664 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
665 unsigned alignment = SN->getAlignment();
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 // The vector type we really want to load from the 16-byte chunk.
670 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
671 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
673 SDValue alignLoadVec;
674 SDValue basePtr = SN->getBasePtr();
675 SDValue the_chain = SN->getChain();
676 SDValue insertEltOffs;
678 if (alignment == 16) {
681 // Special cases for a known aligned load to simplify the base pointer
682 // and insertion byte:
683 if (basePtr.getOpcode() == ISD::ADD
684 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
685 // Known offset into basePtr
686 int64_t offset = CN->getSExtValue();
688 // Simplify the base pointer for this case:
689 basePtr = basePtr.getOperand(0);
690 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
692 DAG.getConstant((offset & 0xf), PtrVT));
694 if ((offset & ~0xf) > 0) {
695 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
697 DAG.getConstant((offset & ~0xf), PtrVT));
700 // Otherwise, assume it's at byte 0 of basePtr
701 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
703 DAG.getConstant(0, PtrVT));
706 // Unaligned load: must be more pessimistic about addressing modes:
707 if (basePtr.getOpcode() == ISD::ADD) {
708 MachineFunction &MF = DAG.getMachineFunction();
709 MachineRegisterInfo &RegInfo = MF.getRegInfo();
710 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
713 SDValue Op0 = basePtr.getOperand(0);
714 SDValue Op1 = basePtr.getOperand(1);
716 if (isa<ConstantSDNode>(Op1)) {
717 // Convert the (add <ptr>, <const>) to an indirect address contained
718 // in a register. Note that this is done because we need to avoid
719 // creating a 0(reg) d-form address due to the SPU's block loads.
720 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
721 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
722 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
724 // Convert the (add <arg1>, <arg2>) to an indirect address, which
725 // will likely be lowered as a reg(reg) x-form address.
726 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
729 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
731 DAG.getConstant(0, PtrVT));
734 // Insertion point is solely determined by basePtr's contents
735 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
737 DAG.getConstant(0, PtrVT));
740 // Re-emit as a v16i8 vector load
741 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
742 SN->getSrcValue(), SN->getSrcValueOffset(),
743 SN->isVolatile(), 16);
746 the_chain = alignLoadVec.getValue(1);
748 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
749 SDValue theValue = SN->getValue();
753 && (theValue.getOpcode() == ISD::AssertZext
754 || theValue.getOpcode() == ISD::AssertSext)) {
755 // Drill down and get the value for zero- and sign-extended
757 theValue = theValue.getOperand(0);
760 // If the base pointer is already a D-form address, then just create
761 // a new D-form address with a slot offset and the orignal base pointer.
762 // Otherwise generate a D-form address with the slot offset relative
763 // to the stack pointer, which is always aligned.
765 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
766 cerr << "CellSPU LowerSTORE: basePtr = ";
767 basePtr.getNode()->dump(&DAG);
772 SDValue insertEltOp =
773 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
774 SDValue vectorizeOp =
775 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
777 result = DAG.getNode(SPUISD::SHUFB, vecVT,
778 vectorizeOp, alignLoadVec,
779 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
781 result = DAG.getStore(the_chain, result, basePtr,
782 LN->getSrcValue(), LN->getSrcValueOffset(),
783 LN->isVolatile(), LN->getAlignment());
785 #if 0 && !defined(NDEBUG)
786 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
787 const SDValue ¤tRoot = DAG.getRoot();
790 cerr << "------- CellSPU:LowerStore result:\n";
793 DAG.setRoot(currentRoot);
804 case ISD::LAST_INDEXED_MODE:
805 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
807 cerr << (unsigned) SN->getAddressingMode() << "\n";
815 //! Generate the address of a constant pool entry.
817 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
818 MVT PtrVT = Op.getValueType();
819 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
820 Constant *C = CP->getConstVal();
821 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
822 SDValue Zero = DAG.getConstant(0, PtrVT);
823 const TargetMachine &TM = DAG.getTarget();
825 if (TM.getRelocationModel() == Reloc::Static) {
826 if (!ST->usingLargeMem()) {
827 // Just return the SDValue with the constant pool address in it.
828 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
830 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
831 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
832 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
837 "LowerConstantPool: Relocation model other than static"
842 //! Alternate entry point for generating the address of a constant pool entry
844 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
845 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
849 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
850 MVT PtrVT = Op.getValueType();
851 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
852 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
853 SDValue Zero = DAG.getConstant(0, PtrVT);
854 const TargetMachine &TM = DAG.getTarget();
856 if (TM.getRelocationModel() == Reloc::Static) {
857 if (!ST->usingLargeMem()) {
858 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
860 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
861 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
862 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
867 "LowerJumpTable: Relocation model other than static not supported.");
872 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
873 MVT PtrVT = Op.getValueType();
874 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
875 GlobalValue *GV = GSDN->getGlobal();
876 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
877 const TargetMachine &TM = DAG.getTarget();
878 SDValue Zero = DAG.getConstant(0, PtrVT);
880 if (TM.getRelocationModel() == Reloc::Static) {
881 if (!ST->usingLargeMem()) {
882 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
884 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
885 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
886 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
889 cerr << "LowerGlobalAddress: Relocation model other than static not "
898 //! Custom lower double precision floating point constants
900 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
901 MVT VT = Op.getValueType();
903 if (VT == MVT::f64) {
904 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
907 "LowerConstantFP: Node is not ConstantFPSDNode");
909 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
910 SDValue T = DAG.getConstant(dbits, MVT::i64);
911 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
912 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
913 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
920 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
922 MachineFunction &MF = DAG.getMachineFunction();
923 MachineFrameInfo *MFI = MF.getFrameInfo();
924 MachineRegisterInfo &RegInfo = MF.getRegInfo();
925 SmallVector<SDValue, 48> ArgValues;
926 SDValue Root = Op.getOperand(0);
927 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
929 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
930 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
932 unsigned ArgOffset = SPUFrameInfo::minStackSize();
933 unsigned ArgRegIdx = 0;
934 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
936 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
938 // Add DAG nodes to load the arguments or copy them out of registers.
939 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
940 ArgNo != e; ++ArgNo) {
941 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
942 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
945 if (ArgRegIdx < NumArgRegs) {
946 const TargetRegisterClass *ArgRegClass;
948 switch (ObjectVT.getSimpleVT()) {
950 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
951 << ObjectVT.getMVTString()
956 ArgRegClass = &SPU::R8CRegClass;
959 ArgRegClass = &SPU::R16CRegClass;
962 ArgRegClass = &SPU::R32CRegClass;
965 ArgRegClass = &SPU::R64CRegClass;
968 ArgRegClass = &SPU::GPRCRegClass;
971 ArgRegClass = &SPU::R32FPRegClass;
974 ArgRegClass = &SPU::R64FPRegClass;
982 ArgRegClass = &SPU::VECREGRegClass;
986 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
987 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
988 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
991 // We need to load the argument to a virtual register if we determined
992 // above that we ran out of physical registers of the appropriate type
993 // or we're forced to do vararg
994 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
995 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
996 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
997 ArgOffset += StackSlotSize;
1000 ArgValues.push_back(ArgVal);
1002 Root = ArgVal.getOperand(0);
1007 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1008 // We will spill (79-3)+1 registers to the stack
1009 SmallVector<SDValue, 79-3+1> MemOps;
1011 // Create the frame slot
1013 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1014 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1015 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1016 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1017 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1018 Root = Store.getOperand(0);
1019 MemOps.push_back(Store);
1021 // Increment address by stack slot size for the next stored argument
1022 ArgOffset += StackSlotSize;
1024 if (!MemOps.empty())
1025 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1028 ArgValues.push_back(Root);
1030 // Return the new list of results.
1031 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1032 &ArgValues[0], ArgValues.size());
1035 /// isLSAAddress - Return the immediate to use if the specified
1036 /// value is representable as a LSA address.
1037 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1038 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1041 int Addr = C->getZExtValue();
1042 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1043 (Addr << 14 >> 14) != Addr)
1044 return 0; // Top 14 bits have to be sext of immediate.
1046 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1050 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1051 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1052 SDValue Chain = TheCall->getChain();
1053 SDValue Callee = TheCall->getCallee();
1054 unsigned NumOps = TheCall->getNumArgs();
1055 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1056 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1057 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1059 // Handy pointer type
1060 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1062 // Accumulate how many bytes are to be pushed on the stack, including the
1063 // linkage area, and parameter passing area. According to the SPU ABI,
1064 // we minimally need space for [LR] and [SP]
1065 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1067 // Set up a copy of the stack pointer for use loading and storing any
1068 // arguments that may not fit in the registers available for argument
1070 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1072 // Figure out which arguments are going to go in registers, and which in
1074 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1075 unsigned ArgRegIdx = 0;
1077 // Keep track of registers passing arguments
1078 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1079 // And the arguments passed on the stack
1080 SmallVector<SDValue, 8> MemOpChains;
1082 for (unsigned i = 0; i != NumOps; ++i) {
1083 SDValue Arg = TheCall->getArg(i);
1085 // PtrOff will be used to store the current argument to the stack if a
1086 // register cannot be found for it.
1087 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1088 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1090 switch (Arg.getValueType().getSimpleVT()) {
1091 default: assert(0 && "Unexpected ValueType for argument!");
1097 if (ArgRegIdx != NumArgRegs) {
1098 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1100 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1101 ArgOffset += StackSlotSize;
1106 if (ArgRegIdx != NumArgRegs) {
1107 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1109 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1110 ArgOffset += StackSlotSize;
1119 if (ArgRegIdx != NumArgRegs) {
1120 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1122 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1123 ArgOffset += StackSlotSize;
1129 // Update number of stack bytes actually used, insert a call sequence start
1130 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1131 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1134 if (!MemOpChains.empty()) {
1135 // Adjust the stack pointer for the stack arguments.
1136 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1137 &MemOpChains[0], MemOpChains.size());
1140 // Build a sequence of copy-to-reg nodes chained together with token chain
1141 // and flag operands which copy the outgoing args into the appropriate regs.
1143 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1144 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1146 InFlag = Chain.getValue(1);
1149 SmallVector<SDValue, 8> Ops;
1150 unsigned CallOpc = SPUISD::CALL;
1152 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1153 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1154 // node so that legalize doesn't hack it.
1155 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1156 GlobalValue *GV = G->getGlobal();
1157 MVT CalleeVT = Callee.getValueType();
1158 SDValue Zero = DAG.getConstant(0, PtrVT);
1159 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1161 if (!ST->usingLargeMem()) {
1162 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1163 // style calls, otherwise, external symbols are BRASL calls. This assumes
1164 // that declared/defined symbols are in the same compilation unit and can
1165 // be reached through PC-relative jumps.
1168 // This may be an unsafe assumption for JIT and really large compilation
1170 if (GV->isDeclaration()) {
1171 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1173 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1176 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1178 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1180 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1181 MVT CalleeVT = Callee.getValueType();
1182 SDValue Zero = DAG.getConstant(0, PtrVT);
1183 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1184 Callee.getValueType());
1186 if (!ST->usingLargeMem()) {
1187 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1189 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1191 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1192 // If this is an absolute destination address that appears to be a legal
1193 // local store address, use the munged value.
1194 Callee = SDValue(Dest, 0);
1197 Ops.push_back(Chain);
1198 Ops.push_back(Callee);
1200 // Add argument registers to the end of the list so that they are known live
1202 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1203 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1204 RegsToPass[i].second.getValueType()));
1206 if (InFlag.getNode())
1207 Ops.push_back(InFlag);
1208 // Returns a chain and a flag for retval copy to use.
1209 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1210 &Ops[0], Ops.size());
1211 InFlag = Chain.getValue(1);
1213 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1214 DAG.getIntPtrConstant(0, true), InFlag);
1215 if (TheCall->getValueType(0) != MVT::Other)
1216 InFlag = Chain.getValue(1);
1218 SDValue ResultVals[3];
1219 unsigned NumResults = 0;
1221 // If the call has results, copy the values out of the ret val registers.
1222 switch (TheCall->getValueType(0).getSimpleVT()) {
1223 default: assert(0 && "Unexpected ret value!");
1224 case MVT::Other: break;
1226 if (TheCall->getValueType(1) == MVT::i32) {
1227 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1228 ResultVals[0] = Chain.getValue(0);
1229 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1230 Chain.getValue(2)).getValue(1);
1231 ResultVals[1] = Chain.getValue(0);
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1235 ResultVals[0] = Chain.getValue(0);
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1241 ResultVals[0] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i128, InFlag).getValue(1);
1246 ResultVals[0] = Chain.getValue(0);
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1252 InFlag).getValue(1);
1253 ResultVals[0] = Chain.getValue(0);
1262 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1263 InFlag).getValue(1);
1264 ResultVals[0] = Chain.getValue(0);
1269 // If the function returns void, just return the chain.
1270 if (NumResults == 0)
1273 // Otherwise, merge everything together with a MERGE_VALUES node.
1274 ResultVals[NumResults++] = Chain;
1275 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1276 return Res.getValue(Op.getResNo());
1280 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1281 SmallVector<CCValAssign, 16> RVLocs;
1282 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1283 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1284 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1285 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1287 // If this is the first return lowered for this function, add the regs to the
1288 // liveout set for the function.
1289 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1290 for (unsigned i = 0; i != RVLocs.size(); ++i)
1291 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1294 SDValue Chain = Op.getOperand(0);
1297 // Copy the result values into the output registers.
1298 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1299 CCValAssign &VA = RVLocs[i];
1300 assert(VA.isRegLoc() && "Can only return in registers!");
1301 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1302 Flag = Chain.getValue(1);
1306 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1308 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1312 //===----------------------------------------------------------------------===//
1313 // Vector related lowering:
1314 //===----------------------------------------------------------------------===//
1316 static ConstantSDNode *
1317 getVecImm(SDNode *N) {
1318 SDValue OpVal(0, 0);
1320 // Check to see if this buildvec has a single non-undef value in its elements.
1321 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1322 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1323 if (OpVal.getNode() == 0)
1324 OpVal = N->getOperand(i);
1325 else if (OpVal != N->getOperand(i))
1329 if (OpVal.getNode() != 0) {
1330 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1335 return 0; // All UNDEF: use implicit def.; not Constant node
1338 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1339 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1341 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343 if (ConstantSDNode *CN = getVecImm(N)) {
1344 uint64_t Value = CN->getZExtValue();
1345 if (ValueType == MVT::i64) {
1346 uint64_t UValue = CN->getZExtValue();
1347 uint32_t upper = uint32_t(UValue >> 32);
1348 uint32_t lower = uint32_t(UValue);
1351 Value = Value >> 32;
1353 if (Value <= 0x3ffff)
1354 return DAG.getTargetConstant(Value, ValueType);
1360 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1361 /// and the value fits into a signed 16-bit constant, and if so, return the
1363 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1365 if (ConstantSDNode *CN = getVecImm(N)) {
1366 int64_t Value = CN->getSExtValue();
1367 if (ValueType == MVT::i64) {
1368 uint64_t UValue = CN->getZExtValue();
1369 uint32_t upper = uint32_t(UValue >> 32);
1370 uint32_t lower = uint32_t(UValue);
1373 Value = Value >> 32;
1375 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1376 return DAG.getTargetConstant(Value, ValueType);
1383 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1384 /// and the value fits into a signed 10-bit constant, and if so, return the
1386 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 int64_t Value = CN->getSExtValue();
1390 if (ValueType == MVT::i64) {
1391 uint64_t UValue = CN->getZExtValue();
1392 uint32_t upper = uint32_t(UValue >> 32);
1393 uint32_t lower = uint32_t(UValue);
1396 Value = Value >> 32;
1398 if (isS10Constant(Value))
1399 return DAG.getTargetConstant(Value, ValueType);
1405 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1406 /// and the value fits into a signed 8-bit constant, and if so, return the
1409 /// @note: The incoming vector is v16i8 because that's the only way we can load
1410 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1412 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1414 if (ConstantSDNode *CN = getVecImm(N)) {
1415 int Value = (int) CN->getZExtValue();
1416 if (ValueType == MVT::i16
1417 && Value <= 0xffff /* truncated from uint64_t */
1418 && ((short) Value >> 8) == ((short) Value & 0xff))
1419 return DAG.getTargetConstant(Value & 0xff, ValueType);
1420 else if (ValueType == MVT::i8
1421 && (Value & 0xff) == Value)
1422 return DAG.getTargetConstant(Value, ValueType);
1428 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into a signed 16-bit constant, and if so, return the
1431 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 uint64_t Value = CN->getZExtValue();
1435 if ((ValueType == MVT::i32
1436 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1437 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1438 return DAG.getTargetConstant(Value >> 16, ValueType);
1444 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1445 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1453 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1454 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1455 if (ConstantSDNode *CN = getVecImm(N)) {
1456 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1462 // If this is a vector of constants or undefs, get the bits. A bit in
1463 // UndefBits is set if the corresponding element of the vector is an
1464 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1465 // zero. Return true if this is not an array of constants, false if it is.
1467 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1468 uint64_t UndefBits[2]) {
1469 // Start with zero'd results.
1470 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1472 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1473 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1474 SDValue OpVal = BV->getOperand(i);
1476 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1477 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1479 uint64_t EltBits = 0;
1480 if (OpVal.getOpcode() == ISD::UNDEF) {
1481 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1482 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1484 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1485 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1486 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1487 const APFloat &apf = CN->getValueAPF();
1488 EltBits = (CN->getValueType(0) == MVT::f32
1489 ? FloatToBits(apf.convertToFloat())
1490 : DoubleToBits(apf.convertToDouble()));
1492 // Nonconstant element.
1496 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1499 //printf("%llx %llx %llx %llx\n",
1500 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1504 /// If this is a splat (repetition) of a value across the whole vector, return
1505 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1506 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1507 /// SplatSize = 1 byte.
1508 static bool isConstantSplat(const uint64_t Bits128[2],
1509 const uint64_t Undef128[2],
1511 uint64_t &SplatBits, uint64_t &SplatUndef,
1513 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1514 // the same as the lower 64-bits, ignoring undefs.
1515 uint64_t Bits64 = Bits128[0] | Bits128[1];
1516 uint64_t Undef64 = Undef128[0] & Undef128[1];
1517 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1518 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1519 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1520 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1522 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1523 if (MinSplatBits < 64) {
1525 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1527 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1528 if (MinSplatBits < 32) {
1530 // If the top 16-bits are different than the lower 16-bits, ignoring
1531 // undefs, we have an i32 splat.
1532 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1533 if (MinSplatBits < 16) {
1534 // If the top 8-bits are different than the lower 8-bits, ignoring
1535 // undefs, we have an i16 splat.
1536 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1537 == ((Bits16 >> 8) & ~Undef16)) {
1538 // Otherwise, we have an 8-bit splat.
1539 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1540 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1546 SplatUndef = Undef16;
1553 SplatUndef = Undef32;
1559 SplatBits = Bits128[0];
1560 SplatUndef = Undef128[0];
1566 return false; // Can't be a splat if two pieces don't match.
1569 //! Lower a BUILD_VECTOR instruction creatively:
1571 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1572 MVT VT = Op.getValueType();
1573 // If this is a vector of constants or undefs, get the bits. A bit in
1574 // UndefBits is set if the corresponding element of the vector is an
1575 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1577 uint64_t VectorBits[2];
1578 uint64_t UndefBits[2];
1579 uint64_t SplatBits, SplatUndef;
1581 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1582 || !isConstantSplat(VectorBits, UndefBits,
1583 VT.getVectorElementType().getSizeInBits(),
1584 SplatBits, SplatUndef, SplatSize))
1585 return SDValue(); // Not a constant vector, not a splat.
1587 switch (VT.getSimpleVT()) {
1589 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1590 << VT.getMVTString()
1595 uint32_t Value32 = uint32_t(SplatBits);
1596 assert(SplatSize == 4
1597 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1598 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599 SDValue T = DAG.getConstant(Value32, MVT::i32);
1600 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1601 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1605 uint64_t f64val = uint64_t(SplatBits);
1606 assert(SplatSize == 8
1607 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1608 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1609 SDValue T = DAG.getConstant(f64val, MVT::i64);
1610 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1615 // 8-bit constants have to be expanded to 16-bits
1616 unsigned short Value16 = SplatBits | (SplatBits << 8);
1618 for (int i = 0; i < 8; ++i)
1619 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1620 return DAG.getNode(ISD::BIT_CONVERT, VT,
1621 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1624 unsigned short Value16;
1626 Value16 = (unsigned short) (SplatBits & 0xffff);
1628 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1629 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1631 for (int i = 0; i < 8; ++i) Ops[i] = T;
1632 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1635 unsigned int Value = SplatBits;
1636 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1637 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1640 unsigned int Value = SplatBits;
1641 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1642 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
1645 return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
1653 SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
1654 uint32_t upper = uint32_t(SplatVal >> 32);
1655 uint32_t lower = uint32_t(SplatVal);
1657 if (upper == lower) {
1658 // Magic constant that can be matched by IL, ILA, et. al.
1659 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1660 return DAG.getNode(ISD::BIT_CONVERT, OpVT,
1661 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662 Val, Val, Val, Val));
1666 SmallVector<SDValue, 16> ShufBytes;
1668 bool upper_special, lower_special;
1670 // NOTE: This code creates common-case shuffle masks that can be easily
1671 // detected as common expressions. It is not attempting to create highly
1672 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1674 // Detect if the upper or lower half is a special shuffle mask pattern:
1675 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1676 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1678 // Create lower vector if not a special pattern
1679 if (!lower_special) {
1680 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1681 LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1682 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1683 LO32C, LO32C, LO32C, LO32C));
1686 // Create upper vector if not a special pattern
1687 if (!upper_special) {
1688 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1689 HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1690 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1691 HI32C, HI32C, HI32C, HI32C));
1694 // If either upper or lower are special, then the two input operands are
1695 // the same (basically, one of them is a "don't care")
1700 if (lower_special && upper_special) {
1701 // Unhappy situation... both upper and lower are special, so punt with
1702 // a target constant:
1703 SDValue Zero = DAG.getConstant(0, MVT::i32);
1704 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1708 for (int i = 0; i < 4; ++i) {
1710 for (int j = 0; j < 4; ++j) {
1712 bool process_upper, process_lower;
1714 process_upper = (upper_special && (i & 1) == 0);
1715 process_lower = (lower_special && (i & 1) == 1);
1717 if (process_upper || process_lower) {
1718 if ((process_upper && upper == 0)
1719 || (process_lower && lower == 0))
1721 else if ((process_upper && upper == 0xffffffff)
1722 || (process_lower && lower == 0xffffffff))
1724 else if ((process_upper && upper == 0x80000000)
1725 || (process_lower && lower == 0x80000000))
1726 val |= (j == 0 ? 0xe0 : 0x80);
1728 val |= i * 4 + j + ((i & 1) * 16);
1731 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1734 return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
1735 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1736 &ShufBytes[0], ShufBytes.size()));
1740 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1741 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1742 /// permutation vector, V3, is monotonically increasing with one "exception"
1743 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1744 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1745 /// In either case, the net result is going to eventually invoke SHUFB to
1746 /// permute/shuffle the bytes from V1 and V2.
1748 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1749 /// control word for byte/halfword/word insertion. This takes care of a single
1750 /// element move from V2 into V1.
1752 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1753 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1754 SDValue V1 = Op.getOperand(0);
1755 SDValue V2 = Op.getOperand(1);
1756 SDValue PermMask = Op.getOperand(2);
1758 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1760 // If we have a single element being moved from V1 to V2, this can be handled
1761 // using the C*[DX] compute mask instructions, but the vector elements have
1762 // to be monotonically increasing with one exception element.
1763 MVT VecVT = V1.getValueType();
1764 MVT EltVT = VecVT.getVectorElementType();
1765 unsigned EltsFromV2 = 0;
1767 unsigned V2EltIdx0 = 0;
1768 unsigned CurrElt = 0;
1769 unsigned MaxElts = VecVT.getVectorNumElements();
1770 unsigned PrevElt = 0;
1772 bool monotonic = true;
1775 if (EltVT == MVT::i8) {
1777 } else if (EltVT == MVT::i16) {
1779 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1781 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1784 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1786 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1787 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1788 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1791 if (SrcElt >= V2EltIdx0) {
1792 if (1 >= (++EltsFromV2)) {
1793 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1795 } else if (CurrElt != SrcElt) {
1803 if (PrevElt > 0 && SrcElt < MaxElts) {
1804 if ((PrevElt == SrcElt - 1)
1805 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1812 } else if (PrevElt == 0) {
1813 // First time through, need to keep track of previous element
1816 // This isn't a rotation, takes elements from vector 2
1823 if (EltsFromV2 == 1 && monotonic) {
1824 // Compute mask and shuffle
1825 MachineFunction &MF = DAG.getMachineFunction();
1826 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1827 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1828 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1829 // Initialize temporary register to 0
1830 SDValue InitTempReg =
1831 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1832 // Copy register's contents as index in SHUFFLE_MASK:
1833 SDValue ShufMaskOp =
1834 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1835 DAG.getTargetConstant(V2Elt, MVT::i32),
1836 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1837 // Use shuffle mask in SHUFB synthetic instruction:
1838 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1839 } else if (rotate) {
1840 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1842 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1843 V1, DAG.getConstant(rotamt, MVT::i16));
1845 // Convert the SHUFFLE_VECTOR mask's input element units to the
1847 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1849 SmallVector<SDValue, 16> ResultMask;
1850 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1852 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1855 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1857 for (unsigned j = 0; j < BytesPerElement; ++j) {
1858 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1863 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1864 &ResultMask[0], ResultMask.size());
1865 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1869 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1870 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1872 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1873 // For a constant, build the appropriate constant vector, which will
1874 // eventually simplify to a vector register load.
1876 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1877 SmallVector<SDValue, 16> ConstVecValues;
1881 // Create a constant vector:
1882 switch (Op.getValueType().getSimpleVT()) {
1883 default: assert(0 && "Unexpected constant value type in "
1884 "LowerSCALAR_TO_VECTOR");
1885 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1886 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1887 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1888 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1889 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1890 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1893 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1894 for (size_t j = 0; j < n_copies; ++j)
1895 ConstVecValues.push_back(CValue);
1897 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1898 &ConstVecValues[0], ConstVecValues.size());
1900 // Otherwise, copy the value from one register to another:
1901 switch (Op0.getValueType().getSimpleVT()) {
1902 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1909 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1916 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1917 MVT VT = Op.getValueType();
1918 SDValue N = Op.getOperand(0);
1919 SDValue Elt = Op.getOperand(1);
1922 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1923 // Constant argument:
1924 int EltNo = (int) C->getZExtValue();
1927 if (VT == MVT::i8 && EltNo >= 16)
1928 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1929 else if (VT == MVT::i16 && EltNo >= 8)
1930 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1931 else if (VT == MVT::i32 && EltNo >= 4)
1932 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1933 else if (VT == MVT::i64 && EltNo >= 2)
1934 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1936 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1937 // i32 and i64: Element 0 is the preferred slot
1938 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1941 // Need to generate shuffle mask and extract:
1942 int prefslot_begin = -1, prefslot_end = -1;
1943 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1945 switch (VT.getSimpleVT()) {
1947 assert(false && "Invalid value type!");
1949 prefslot_begin = prefslot_end = 3;
1953 prefslot_begin = 2; prefslot_end = 3;
1958 prefslot_begin = 0; prefslot_end = 3;
1963 prefslot_begin = 0; prefslot_end = 7;
1968 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1969 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1971 unsigned int ShufBytes[16];
1972 for (int i = 0; i < 16; ++i) {
1973 // zero fill uppper part of preferred slot, don't care about the
1975 unsigned int mask_val;
1976 if (i <= prefslot_end) {
1978 ((i < prefslot_begin)
1980 : elt_byte + (i - prefslot_begin));
1982 ShufBytes[i] = mask_val;
1984 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1987 SDValue ShufMask[4];
1988 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1989 unsigned bidx = i * 4;
1990 unsigned int bits = ((ShufBytes[bidx] << 24) |
1991 (ShufBytes[bidx+1] << 16) |
1992 (ShufBytes[bidx+2] << 8) |
1994 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1997 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1999 sizeof(ShufMask) / sizeof(ShufMask[0]));
2001 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2002 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2003 N, N, ShufMaskVec));
2005 // Variable index: Rotate the requested element into slot 0, then replicate
2006 // slot 0 across the vector
2007 MVT VecVT = N.getValueType();
2008 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2009 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2013 // Make life easier by making sure the index is zero-extended to i32
2014 if (Elt.getValueType() != MVT::i32)
2015 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2017 // Scale the index to a bit/byte shift quantity
2019 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2020 unsigned scaleShift = scaleFactor.logBase2();
2023 if (scaleShift > 0) {
2024 // Scale the shift factor:
2025 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2026 DAG.getConstant(scaleShift, MVT::i32));
2029 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2031 // Replicate the bytes starting at byte 0 across the entire vector (for
2032 // consistency with the notion of a unified register set)
2035 switch (VT.getSimpleVT()) {
2037 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2041 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2042 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2047 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2048 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2054 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2055 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2061 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2062 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2063 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2064 loFactor, hiFactor);
2069 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2070 DAG.getNode(SPUISD::SHUFB, VecVT,
2071 vecShift, vecShift, replicate));
2077 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2078 SDValue VecOp = Op.getOperand(0);
2079 SDValue ValOp = Op.getOperand(1);
2080 SDValue IdxOp = Op.getOperand(2);
2081 MVT VT = Op.getValueType();
2083 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2084 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2086 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2087 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2088 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2089 DAG.getRegister(SPU::R1, PtrVT),
2090 DAG.getConstant(CN->getSExtValue(), PtrVT));
2091 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2094 DAG.getNode(SPUISD::SHUFB, VT,
2095 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2097 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2102 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2103 const TargetLowering &TLI)
2105 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2106 MVT ShiftVT = TLI.getShiftAmountTy();
2108 assert(Op.getValueType() == MVT::i8);
2111 assert(0 && "Unhandled i8 math operator");
2115 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2117 SDValue N1 = Op.getOperand(1);
2118 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2119 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2120 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2121 DAG.getNode(Opc, MVT::i16, N0, N1));
2126 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2128 SDValue N1 = Op.getOperand(1);
2129 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2130 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2131 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2132 DAG.getNode(Opc, MVT::i16, N0, N1));
2136 SDValue N1 = Op.getOperand(1);
2138 N0 = (N0.getOpcode() != ISD::Constant
2139 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2140 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2142 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2145 N1 = (N1.getOpcode() != ISD::Constant
2146 ? DAG.getNode(N1Opc, ShiftVT, N1)
2147 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2148 TLI.getShiftAmountTy()));
2150 DAG.getNode(ISD::OR, MVT::i16, N0,
2151 DAG.getNode(ISD::SHL, MVT::i16,
2152 N0, DAG.getConstant(8, MVT::i32)));
2153 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2154 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2158 SDValue N1 = Op.getOperand(1);
2160 N0 = (N0.getOpcode() != ISD::Constant
2161 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2162 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2164 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2167 N1 = (N1.getOpcode() != ISD::Constant
2168 ? DAG.getNode(N1Opc, ShiftVT, N1)
2169 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2170 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2171 DAG.getNode(Opc, MVT::i16, N0, N1));
2174 SDValue N1 = Op.getOperand(1);
2176 N0 = (N0.getOpcode() != ISD::Constant
2177 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2178 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2180 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2183 N1 = (N1.getOpcode() != ISD::Constant
2184 ? DAG.getNode(N1Opc, ShiftVT, N1)
2185 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2187 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2188 DAG.getNode(Opc, MVT::i16, N0, N1));
2191 SDValue N1 = Op.getOperand(1);
2193 N0 = (N0.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2195 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2197 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2198 N1 = (N1.getOpcode() != ISD::Constant
2199 ? DAG.getNode(N1Opc, MVT::i16, N1)
2200 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2202 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2203 DAG.getNode(Opc, MVT::i16, N0, N1));
2211 //! Generate the carry-generate shuffle mask.
2212 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
2213 SmallVector<SDValue, 16 > ShufBytes;
2215 // Create the shuffle mask for "rotating" the borrow up one register slot
2216 // once the borrow is generated.
2217 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2218 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2219 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2220 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2222 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2223 &ShufBytes[0], ShufBytes.size());
2226 //! Generate the borrow-generate shuffle mask
2227 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
2228 SmallVector<SDValue, 16 > ShufBytes;
2230 // Create the shuffle mask for "rotating" the borrow up one register slot
2231 // once the borrow is generated.
2232 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2233 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2234 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2235 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2237 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2238 &ShufBytes[0], ShufBytes.size());
2241 //! Lower byte immediate operations for v16i8 vectors:
2243 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2246 MVT VT = Op.getValueType();
2248 ConstVec = Op.getOperand(0);
2249 Arg = Op.getOperand(1);
2250 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2251 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2252 ConstVec = ConstVec.getOperand(0);
2254 ConstVec = Op.getOperand(1);
2255 Arg = Op.getOperand(0);
2256 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2257 ConstVec = ConstVec.getOperand(0);
2262 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2263 uint64_t VectorBits[2];
2264 uint64_t UndefBits[2];
2265 uint64_t SplatBits, SplatUndef;
2268 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2269 && isConstantSplat(VectorBits, UndefBits,
2270 VT.getVectorElementType().getSizeInBits(),
2271 SplatBits, SplatUndef, SplatSize)) {
2273 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2274 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2276 // Turn the BUILD_VECTOR into a set of target constants:
2277 for (size_t i = 0; i < tcVecSize; ++i)
2280 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2281 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2285 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2286 // lowered. Return the operation, rather than a null SDValue.
2290 //! Custom lowering for CTPOP (count population)
2292 Custom lowering code that counts the number ones in the input
2293 operand. SPU has such an instruction, but it counts the number of
2294 ones per byte, which then have to be accumulated.
2296 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2297 MVT VT = Op.getValueType();
2298 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2300 switch (VT.getSimpleVT()) {
2302 assert(false && "Invalid value type!");
2304 SDValue N = Op.getOperand(0);
2305 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2307 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2308 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2310 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2314 MachineFunction &MF = DAG.getMachineFunction();
2315 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2317 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2319 SDValue N = Op.getOperand(0);
2320 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2321 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2322 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2324 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2325 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2327 // CNTB_result becomes the chain to which all of the virtual registers
2328 // CNTB_reg, SUM1_reg become associated:
2329 SDValue CNTB_result =
2330 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2332 SDValue CNTB_rescopy =
2333 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2335 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2337 return DAG.getNode(ISD::AND, MVT::i16,
2338 DAG.getNode(ISD::ADD, MVT::i16,
2339 DAG.getNode(ISD::SRL, MVT::i16,
2346 MachineFunction &MF = DAG.getMachineFunction();
2347 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2349 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2350 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2352 SDValue N = Op.getOperand(0);
2353 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2354 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2355 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2356 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2358 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2359 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2361 // CNTB_result becomes the chain to which all of the virtual registers
2362 // CNTB_reg, SUM1_reg become associated:
2363 SDValue CNTB_result =
2364 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2366 SDValue CNTB_rescopy =
2367 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2370 DAG.getNode(ISD::SRL, MVT::i32,
2371 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2374 DAG.getNode(ISD::ADD, MVT::i32,
2375 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2377 SDValue Sum1_rescopy =
2378 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2381 DAG.getNode(ISD::SRL, MVT::i32,
2382 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2385 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2386 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2388 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2398 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2400 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2401 All conversions to i64 are expanded to a libcall.
2403 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2404 SPUTargetLowering &TLI) {
2405 MVT OpVT = Op.getValueType();
2406 SDValue Op0 = Op.getOperand(0);
2407 MVT Op0VT = Op0.getValueType();
2409 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2410 || OpVT == MVT::i64) {
2411 // Convert f32 / f64 to i32 / i64 via libcall.
2413 (Op.getOpcode() == ISD::FP_TO_SINT)
2414 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2415 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2416 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2418 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2421 return Op; // return unmolested, legalized op
2424 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2426 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2427 All conversions from i64 are expanded to a libcall.
2429 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2430 SPUTargetLowering &TLI) {
2431 MVT OpVT = Op.getValueType();
2432 SDValue Op0 = Op.getOperand(0);
2433 MVT Op0VT = Op0.getValueType();
2435 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2436 || Op0VT == MVT::i64) {
2437 // Convert i32, i64 to f64 via libcall:
2439 (Op.getOpcode() == ISD::SINT_TO_FP)
2440 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2441 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2442 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2444 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2447 return Op; // return unmolested, legalized
2450 //! Lower ISD::SETCC
2452 This handles MVT::f64 (double floating point) condition lowering
2454 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2455 const TargetLowering &TLI) {
2456 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2457 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2459 SDValue lhs = Op.getOperand(0);
2460 SDValue rhs = Op.getOperand(1);
2461 MVT lhsVT = lhs.getValueType();
2462 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2464 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2465 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2466 MVT IntVT(MVT::i64);
2468 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2469 // selected to a NOP:
2470 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, lhs);
2472 DAG.getNode(ISD::TRUNCATE, MVT::i32,
2473 DAG.getNode(ISD::SRL, IntVT,
2474 i64lhs, DAG.getConstant(32, MVT::i32)));
2475 SDValue lhsHi32abs =
2476 DAG.getNode(ISD::AND, MVT::i32,
2477 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2479 DAG.getNode(ISD::TRUNCATE, MVT::i32, i64lhs);
2481 // SETO and SETUO only use the lhs operand:
2482 if (CC->get() == ISD::SETO) {
2483 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2485 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2486 return DAG.getNode(ISD::XOR, ccResultVT,
2487 DAG.getSetCC(ccResultVT,
2488 lhs, DAG.getConstantFP(0.0, lhsVT),
2490 DAG.getConstant(ccResultAllOnes, ccResultVT));
2491 } else if (CC->get() == ISD::SETUO) {
2492 // Evaluates to true if Op0 is [SQ]NaN
2493 return DAG.getNode(ISD::AND, ccResultVT,
2494 DAG.getSetCC(ccResultVT,
2496 DAG.getConstant(0x7ff00000, MVT::i32),
2498 DAG.getSetCC(ccResultVT,
2500 DAG.getConstant(0, MVT::i32),
2504 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
2506 DAG.getNode(ISD::TRUNCATE, MVT::i32,
2507 DAG.getNode(ISD::SRL, IntVT,
2508 i64rhs, DAG.getConstant(32, MVT::i32)));
2510 // If a value is negative, subtract from the sign magnitude constant:
2511 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2513 // Convert the sign-magnitude representation into 2's complement:
2514 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
2515 lhsHi32, DAG.getConstant(31, MVT::i32));
2516 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64lhs);
2518 DAG.getNode(ISD::SELECT, IntVT,
2519 lhsSelectMask, lhsSignMag2TC, i64lhs);
2521 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
2522 rhsHi32, DAG.getConstant(31, MVT::i32));
2523 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64rhs);
2525 DAG.getNode(ISD::SELECT, IntVT,
2526 rhsSelectMask, rhsSignMag2TC, i64rhs);
2530 switch (CC->get()) {
2533 compareOp = ISD::SETEQ; break;
2536 compareOp = ISD::SETGT; break;
2539 compareOp = ISD::SETGE; break;
2542 compareOp = ISD::SETLT; break;
2545 compareOp = ISD::SETLE; break;
2548 compareOp = ISD::SETNE; break;
2550 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2556 DAG.getSetCC(ccResultVT, lhsSelect, rhsSelect, (ISD::CondCode) compareOp);
2558 if ((CC->get() & 0x8) == 0) {
2559 // Ordered comparison:
2560 SDValue lhsNaN = DAG.getSetCC(ccResultVT,
2561 lhs, DAG.getConstantFP(0.0, MVT::f64),
2563 SDValue rhsNaN = DAG.getSetCC(ccResultVT,
2564 rhs, DAG.getConstantFP(0.0, MVT::f64),
2566 SDValue ordered = DAG.getNode(ISD::AND, ccResultVT, lhsNaN, rhsNaN);
2568 result = DAG.getNode(ISD::AND, ccResultVT, ordered, result);
2574 //! Lower ISD::SELECT_CC
2576 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2579 \note Need to revisit this in the future: if the code path through the true
2580 and false value computations is longer than the latency of a branch (6
2581 cycles), then it would be more advantageous to branch and insert a new basic
2582 block and branch on the condition. However, this code does not make that
2583 assumption, given the simplisitc uses so far.
2586 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2587 const TargetLowering &TLI) {
2588 MVT VT = Op.getValueType();
2589 SDValue lhs = Op.getOperand(0);
2590 SDValue rhs = Op.getOperand(1);
2591 SDValue trueval = Op.getOperand(2);
2592 SDValue falseval = Op.getOperand(3);
2593 SDValue condition = Op.getOperand(4);
2595 // NOTE: SELB's arguments: $rA, $rB, $mask
2597 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2598 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2599 // condition was true and 0s where the condition was false. Hence, the
2600 // arguments to SELB get reversed.
2602 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2603 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2604 // with another "cannot select select_cc" assert:
2606 SDValue compare = DAG.getNode(ISD::SETCC,
2607 TLI.getSetCCResultType(Op.getValueType()),
2608 lhs, rhs, condition);
2609 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2612 //! Custom lower ISD::TRUNCATE
2613 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2615 MVT VT = Op.getValueType();
2616 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2617 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2619 SDValue Op0 = Op.getOperand(0);
2620 MVT Op0VT = Op0.getValueType();
2621 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2623 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2624 // Create shuffle mask, least significant doubleword of quadword
2625 unsigned maskHigh = 0x08090a0b;
2626 unsigned maskLow = 0x0c0d0e0f;
2627 // Use a shuffle to perform the truncation
2628 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2629 DAG.getConstant(maskHigh, MVT::i32),
2630 DAG.getConstant(maskLow, MVT::i32),
2631 DAG.getConstant(maskHigh, MVT::i32),
2632 DAG.getConstant(maskLow, MVT::i32));
2635 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2637 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2638 PromoteScalar, PromoteScalar, shufMask);
2640 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2641 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2644 return SDValue(); // Leave the truncate unmolested
2647 //! Custom (target-specific) lowering entry point
2649 This is where LLVM's DAG selection process calls to do target-specific
2653 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2655 unsigned Opc = (unsigned) Op.getOpcode();
2656 MVT VT = Op.getValueType();
2660 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2661 cerr << "Op.getOpcode() = " << Opc << "\n";
2662 cerr << "*Op.getNode():\n";
2663 Op.getNode()->dump();
2670 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2672 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2673 case ISD::ConstantPool:
2674 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2675 case ISD::GlobalAddress:
2676 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2677 case ISD::JumpTable:
2678 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2679 case ISD::ConstantFP:
2680 return LowerConstantFP(Op, DAG);
2681 case ISD::FORMAL_ARGUMENTS:
2682 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2684 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2686 return LowerRET(Op, DAG, getTargetMachine());
2688 // i8, i64 math ops:
2697 return LowerI8Math(Op, DAG, Opc, *this);
2701 case ISD::FP_TO_SINT:
2702 case ISD::FP_TO_UINT:
2703 return LowerFP_TO_INT(Op, DAG, *this);
2705 case ISD::SINT_TO_FP:
2706 case ISD::UINT_TO_FP:
2707 return LowerINT_TO_FP(Op, DAG, *this);
2709 // Vector-related lowering.
2710 case ISD::BUILD_VECTOR:
2711 return LowerBUILD_VECTOR(Op, DAG);
2712 case ISD::SCALAR_TO_VECTOR:
2713 return LowerSCALAR_TO_VECTOR(Op, DAG);
2714 case ISD::VECTOR_SHUFFLE:
2715 return LowerVECTOR_SHUFFLE(Op, DAG);
2716 case ISD::EXTRACT_VECTOR_ELT:
2717 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2718 case ISD::INSERT_VECTOR_ELT:
2719 return LowerINSERT_VECTOR_ELT(Op, DAG);
2721 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2725 return LowerByteImmed(Op, DAG);
2727 // Vector and i8 multiply:
2730 return LowerI8Math(Op, DAG, Opc, *this);
2733 return LowerCTPOP(Op, DAG);
2735 case ISD::SELECT_CC:
2736 return LowerSELECT_CC(Op, DAG, *this);
2739 return LowerSETCC(Op, DAG, *this);
2742 return LowerTRUNCATE(Op, DAG);
2748 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2749 SmallVectorImpl<SDValue>&Results,
2753 unsigned Opc = (unsigned) N->getOpcode();
2754 MVT OpVT = N->getValueType(0);
2758 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2759 cerr << "Op.getOpcode() = " << Opc << "\n";
2760 cerr << "*Op.getNode():\n";
2768 /* Otherwise, return unchanged */
2771 //===----------------------------------------------------------------------===//
2772 // Target Optimization Hooks
2773 //===----------------------------------------------------------------------===//
2776 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2779 TargetMachine &TM = getTargetMachine();
2781 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2782 SelectionDAG &DAG = DCI.DAG;
2783 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2784 MVT NodeVT = N->getValueType(0); // The node's value type
2785 MVT Op0VT = Op0.getValueType(); // The first operand's result
2786 SDValue Result; // Initially, empty result
2788 switch (N->getOpcode()) {
2791 SDValue Op1 = N->getOperand(1);
2793 if (Op0.getOpcode() == SPUISD::IndirectAddr
2794 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2795 // Normalize the operands to reduce repeated code
2796 SDValue IndirectArg = Op0, AddArg = Op1;
2798 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2803 if (isa<ConstantSDNode>(AddArg)) {
2804 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2805 SDValue IndOp1 = IndirectArg.getOperand(1);
2807 if (CN0->isNullValue()) {
2808 // (add (SPUindirect <arg>, <arg>), 0) ->
2809 // (SPUindirect <arg>, <arg>)
2811 #if !defined(NDEBUG)
2812 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2814 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2815 << "With: (SPUindirect <arg>, <arg>)\n";
2820 } else if (isa<ConstantSDNode>(IndOp1)) {
2821 // (add (SPUindirect <arg>, <const>), <const>) ->
2822 // (SPUindirect <arg>, <const + const>)
2823 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2824 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2825 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2827 #if !defined(NDEBUG)
2828 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2830 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2831 << "), " << CN0->getSExtValue() << ")\n"
2832 << "With: (SPUindirect <arg>, "
2833 << combinedConst << ")\n";
2837 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2838 IndirectArg, combinedValue);
2844 case ISD::SIGN_EXTEND:
2845 case ISD::ZERO_EXTEND:
2846 case ISD::ANY_EXTEND: {
2847 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2848 // (any_extend (SPUextract_elt0 <arg>)) ->
2849 // (SPUextract_elt0 <arg>)
2850 // Types must match, however...
2851 #if !defined(NDEBUG)
2852 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2853 cerr << "\nReplace: ";
2856 Op0.getNode()->dump(&DAG);
2865 case SPUISD::IndirectAddr: {
2866 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2867 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2868 if (CN != 0 && CN->getZExtValue() == 0) {
2869 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2870 // (SPUaform <addr>, 0)
2872 DEBUG(cerr << "Replace: ");
2873 DEBUG(N->dump(&DAG));
2874 DEBUG(cerr << "\nWith: ");
2875 DEBUG(Op0.getNode()->dump(&DAG));
2876 DEBUG(cerr << "\n");
2880 } else if (Op0.getOpcode() == ISD::ADD) {
2881 SDValue Op1 = N->getOperand(1);
2882 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2883 // (SPUindirect (add <arg>, <arg>), 0) ->
2884 // (SPUindirect <arg>, <arg>)
2885 if (CN1->isNullValue()) {
2887 #if !defined(NDEBUG)
2888 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2890 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2891 << "With: (SPUindirect <arg>, <arg>)\n";
2895 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2896 Op0.getOperand(0), Op0.getOperand(1));
2902 case SPUISD::SHLQUAD_L_BITS:
2903 case SPUISD::SHLQUAD_L_BYTES:
2904 case SPUISD::VEC_SHL:
2905 case SPUISD::VEC_SRL:
2906 case SPUISD::VEC_SRA:
2907 case SPUISD::ROTBYTES_LEFT: {
2908 SDValue Op1 = N->getOperand(1);
2910 // Kill degenerate vector shifts:
2911 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2912 if (CN->isNullValue()) {
2918 case SPUISD::PREFSLOT2VEC: {
2919 switch (Op0.getOpcode()) {
2922 case ISD::ANY_EXTEND:
2923 case ISD::ZERO_EXTEND:
2924 case ISD::SIGN_EXTEND: {
2925 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2927 // but only if the SPUprefslot2vec and <arg> types match.
2928 SDValue Op00 = Op0.getOperand(0);
2929 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2930 SDValue Op000 = Op00.getOperand(0);
2931 if (Op000.getValueType() == NodeVT) {
2937 case SPUISD::VEC2PREFSLOT: {
2938 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2940 Result = Op0.getOperand(0);
2948 // Otherwise, return unchanged.
2950 if (Result.getNode()) {
2951 DEBUG(cerr << "\nReplace.SPU: ");
2952 DEBUG(N->dump(&DAG));
2953 DEBUG(cerr << "\nWith: ");
2954 DEBUG(Result.getNode()->dump(&DAG));
2955 DEBUG(cerr << "\n");
2962 //===----------------------------------------------------------------------===//
2963 // Inline Assembly Support
2964 //===----------------------------------------------------------------------===//
2966 /// getConstraintType - Given a constraint letter, return the type of
2967 /// constraint it is for this target.
2968 SPUTargetLowering::ConstraintType
2969 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2970 if (ConstraintLetter.size() == 1) {
2971 switch (ConstraintLetter[0]) {
2978 return C_RegisterClass;
2981 return TargetLowering::getConstraintType(ConstraintLetter);
2984 std::pair<unsigned, const TargetRegisterClass*>
2985 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2988 if (Constraint.size() == 1) {
2989 // GCC RS6000 Constraint Letters
2990 switch (Constraint[0]) {
2994 return std::make_pair(0U, SPU::R64CRegisterClass);
2995 return std::make_pair(0U, SPU::R32CRegisterClass);
2998 return std::make_pair(0U, SPU::R32FPRegisterClass);
2999 else if (VT == MVT::f64)
3000 return std::make_pair(0U, SPU::R64FPRegisterClass);
3003 return std::make_pair(0U, SPU::GPRCRegisterClass);
3007 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3010 //! Compute used/known bits for a SPU operand
3012 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3016 const SelectionDAG &DAG,
3017 unsigned Depth ) const {
3019 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3021 switch (Op.getOpcode()) {
3023 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3029 case SPUISD::PREFSLOT2VEC:
3030 case SPUISD::LDRESULT:
3031 case SPUISD::VEC2PREFSLOT:
3032 case SPUISD::SHLQUAD_L_BITS:
3033 case SPUISD::SHLQUAD_L_BYTES:
3034 case SPUISD::VEC_SHL:
3035 case SPUISD::VEC_SRL:
3036 case SPUISD::VEC_SRA:
3037 case SPUISD::VEC_ROTL:
3038 case SPUISD::VEC_ROTR:
3039 case SPUISD::ROTBYTES_LEFT:
3040 case SPUISD::SELECT_MASK:
3047 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3048 unsigned Depth) const {
3049 switch (Op.getOpcode()) {
3054 MVT VT = Op.getValueType();
3056 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3059 return VT.getSizeInBits();
3064 // LowerAsmOperandForConstraint
3066 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3067 char ConstraintLetter,
3069 std::vector<SDValue> &Ops,
3070 SelectionDAG &DAG) const {
3071 // Default, for the time being, to the base class handler
3072 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3076 /// isLegalAddressImmediate - Return true if the integer value can be used
3077 /// as the offset of the target addressing mode.
3078 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3079 const Type *Ty) const {
3080 // SPU's addresses are 256K:
3081 return (V > -(1 << 18) && V < (1 << 18) - 1);
3084 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3089 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3090 // The SPU target isn't yet aware of offsets.