1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
73 cerr << "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG);
118 return CallInfo.first;
122 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
123 : TargetLowering(TM),
126 // Fold away setcc operations if possible.
129 // Use _setjmp/_longjmp instead of setjmp/longjmp.
130 setUseUnderscoreSetJmp(true);
131 setUseUnderscoreLongJmp(true);
133 // Set RTLIB libcall names as used by SPU:
134 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
136 // Set up the SPU's register classes:
137 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
138 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
139 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
140 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
141 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
142 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
143 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
145 // SPU has no sign or zero extended loads for i1, i8, i16:
146 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
147 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
150 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
153 // SPU constant load actions are custom lowered:
154 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
155 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
157 // SPU's loads and stores have to be custom lowered:
158 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
160 MVT VT = (MVT::SimpleValueType)sctype;
162 setOperationAction(ISD::LOAD, VT, Custom);
163 setOperationAction(ISD::STORE, VT, Custom);
164 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
165 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
168 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
169 MVT StoreVT = (MVT::SimpleValueType) stype;
170 setTruncStoreAction(VT, StoreVT, Expand);
174 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
176 MVT VT = (MVT::SimpleValueType) sctype;
178 setOperationAction(ISD::LOAD, VT, Custom);
179 setOperationAction(ISD::STORE, VT, Custom);
181 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
182 MVT StoreVT = (MVT::SimpleValueType) stype;
183 setTruncStoreAction(VT, StoreVT, Expand);
187 // Expand the jumptable branches
188 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
189 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
191 // Custom lower SELECT_CC for most cases, but expand by default
192 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
193 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
194 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
198 // SPU has no intrinsics for these particular operations:
199 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
201 // SPU has no SREM/UREM instructions
202 setOperationAction(ISD::SREM, MVT::i32, Expand);
203 setOperationAction(ISD::UREM, MVT::i32, Expand);
204 setOperationAction(ISD::SREM, MVT::i64, Expand);
205 setOperationAction(ISD::UREM, MVT::i64, Expand);
207 // We don't support sin/cos/sqrt/fmod
208 setOperationAction(ISD::FSIN , MVT::f64, Expand);
209 setOperationAction(ISD::FCOS , MVT::f64, Expand);
210 setOperationAction(ISD::FREM , MVT::f64, Expand);
211 setOperationAction(ISD::FSIN , MVT::f32, Expand);
212 setOperationAction(ISD::FCOS , MVT::f32, Expand);
213 setOperationAction(ISD::FREM , MVT::f32, Expand);
215 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
217 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
218 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
220 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
223 // SPU can do rotate right and left, so legalize it... but customize for i8
224 // because instructions don't exist.
226 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
228 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
229 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
232 setOperationAction(ISD::ROTL, MVT::i32, Legal);
233 setOperationAction(ISD::ROTL, MVT::i16, Legal);
234 setOperationAction(ISD::ROTL, MVT::i8, Custom);
236 // SPU has no native version of shift left/right for i8
237 setOperationAction(ISD::SHL, MVT::i8, Custom);
238 setOperationAction(ISD::SRL, MVT::i8, Custom);
239 setOperationAction(ISD::SRA, MVT::i8, Custom);
241 // Make these operations legal and handle them during instruction selection:
242 setOperationAction(ISD::SHL, MVT::i64, Legal);
243 setOperationAction(ISD::SRL, MVT::i64, Legal);
244 setOperationAction(ISD::SRA, MVT::i64, Legal);
246 // Custom lower i8, i32 and i64 multiplications
247 setOperationAction(ISD::MUL, MVT::i8, Custom);
248 setOperationAction(ISD::MUL, MVT::i32, Legal);
249 setOperationAction(ISD::MUL, MVT::i64, Legal);
251 // Need to custom handle (some) common i8, i64 math ops
252 setOperationAction(ISD::ADD, MVT::i8, Custom);
253 setOperationAction(ISD::ADD, MVT::i64, Legal);
254 setOperationAction(ISD::SUB, MVT::i8, Custom);
255 setOperationAction(ISD::SUB, MVT::i64, Legal);
257 // SPU does not have BSWAP. It does have i32 support CTLZ.
258 // CTPOP has to be custom lowered.
259 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
260 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
262 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
263 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
264 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
265 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
267 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
268 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
270 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
272 // SPU has a version of select that implements (a&~c)|(b&c), just like
273 // select ought to work:
274 setOperationAction(ISD::SELECT, MVT::i8, Legal);
275 setOperationAction(ISD::SELECT, MVT::i16, Legal);
276 setOperationAction(ISD::SELECT, MVT::i32, Legal);
277 setOperationAction(ISD::SELECT, MVT::i64, Legal);
279 setOperationAction(ISD::SETCC, MVT::i8, Legal);
280 setOperationAction(ISD::SETCC, MVT::i16, Legal);
281 setOperationAction(ISD::SETCC, MVT::i32, Legal);
282 setOperationAction(ISD::SETCC, MVT::i64, Legal);
283 setOperationAction(ISD::SETCC, MVT::f64, Custom);
285 // Custom lower i128 -> i64 truncates
286 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
288 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
289 // to expand to a libcall, hence the custom lowering:
290 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
291 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
293 // FDIV on SPU requires custom lowering
294 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
296 // SPU has [U|S]INT_TO_FP
297 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
298 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
300 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
303 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
304 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
306 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
307 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
311 // We cannot sextinreg(i1). Expand to shifts.
312 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
314 // Support label based line numbers.
315 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
316 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
318 // We want to legalize GlobalAddress and ConstantPool nodes into the
319 // appropriate instructions to materialize the address.
320 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
322 MVT VT = (MVT::SimpleValueType)sctype;
324 setOperationAction(ISD::GlobalAddress, VT, Custom);
325 setOperationAction(ISD::ConstantPool, VT, Custom);
326 setOperationAction(ISD::JumpTable, VT, Custom);
329 // RET must be custom lowered, to meet ABI requirements
330 setOperationAction(ISD::RET, MVT::Other, Custom);
332 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
333 setOperationAction(ISD::VASTART , MVT::Other, Custom);
335 // Use the default implementation.
336 setOperationAction(ISD::VAARG , MVT::Other, Expand);
337 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
338 setOperationAction(ISD::VAEND , MVT::Other, Expand);
339 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
340 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
341 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
344 // Cell SPU has instructions for converting between i64 and fp.
345 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
346 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
348 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
349 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
351 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
352 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
354 // First set operation action for all vector types to expand. Then we
355 // will selectively turn on ones that can be effectively codegen'd.
356 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
357 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
363 // "Odd size" vector classes that we're willing to support:
364 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
366 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
367 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
368 MVT VT = (MVT::SimpleValueType)i;
370 // add/sub are legal for all supported vector VT's.
371 setOperationAction(ISD::ADD, VT, Legal);
372 setOperationAction(ISD::SUB, VT, Legal);
373 // mul has to be custom lowered.
374 setOperationAction(ISD::MUL, VT, Legal);
376 setOperationAction(ISD::AND, VT, Legal);
377 setOperationAction(ISD::OR, VT, Legal);
378 setOperationAction(ISD::XOR, VT, Legal);
379 setOperationAction(ISD::LOAD, VT, Legal);
380 setOperationAction(ISD::SELECT, VT, Legal);
381 setOperationAction(ISD::STORE, VT, Legal);
383 // These operations need to be expanded:
384 setOperationAction(ISD::SDIV, VT, Expand);
385 setOperationAction(ISD::SREM, VT, Expand);
386 setOperationAction(ISD::UDIV, VT, Expand);
387 setOperationAction(ISD::UREM, VT, Expand);
389 // Custom lower build_vector, constant pool spills, insert and
390 // extract vector elements:
391 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
392 setOperationAction(ISD::ConstantPool, VT, Custom);
393 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
394 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
395 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
399 setOperationAction(ISD::AND, MVT::v16i8, Custom);
400 setOperationAction(ISD::OR, MVT::v16i8, Custom);
401 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
402 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
404 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
406 setShiftAmountType(MVT::i32);
407 setBooleanContents(ZeroOrNegativeOneBooleanContent);
409 setStackPointerRegisterToSaveRestore(SPU::R1);
411 // We have target-specific dag combine patterns for the following nodes:
412 setTargetDAGCombine(ISD::ADD);
413 setTargetDAGCombine(ISD::ZERO_EXTEND);
414 setTargetDAGCombine(ISD::SIGN_EXTEND);
415 setTargetDAGCombine(ISD::ANY_EXTEND);
417 computeRegisterProperties();
419 // Set pre-RA register scheduler default to BURR, which produces slightly
420 // better code than the default (could also be TDRR, but TargetLowering.h
421 // needs a mod to support that model):
422 setSchedulingPreference(SchedulingForRegPressure);
426 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
428 if (node_names.empty()) {
429 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
430 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
431 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
432 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
433 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
434 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
435 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
436 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
437 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
438 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
439 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
440 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
441 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
442 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
444 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
445 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
446 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
447 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
448 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
449 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
451 "SPUISD::ROTBYTES_LEFT_BITS";
452 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
453 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
454 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
455 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
456 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
459 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
461 return ((i != node_names.end()) ? i->second : 0);
464 //===----------------------------------------------------------------------===//
465 // Return the Cell SPU's SETCC result type
466 //===----------------------------------------------------------------------===//
468 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
469 // i16 and i32 are valid SETCC result types
470 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
473 //===----------------------------------------------------------------------===//
474 // Calling convention code:
475 //===----------------------------------------------------------------------===//
477 #include "SPUGenCallingConv.inc"
479 //===----------------------------------------------------------------------===//
480 // LowerOperation implementation
481 //===----------------------------------------------------------------------===//
483 /// Custom lower loads for CellSPU
485 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
486 within a 16-byte block, we have to rotate to extract the requested element.
488 For extending loads, we also want to ensure that the following sequence is
489 emitted, e.g. for MVT::f32 extending load to MVT::f64:
493 %2 v16i8,ch = rotate %1
494 %3 v4f8, ch = bitconvert %2
495 %4 f32 = vec2perfslot %3
496 %5 f64 = fp_extend %4
500 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
501 LoadSDNode *LN = cast<LoadSDNode>(Op);
502 SDValue the_chain = LN->getChain();
503 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
504 MVT InVT = LN->getMemoryVT();
505 MVT OutVT = Op.getValueType();
506 ISD::LoadExtType ExtType = LN->getExtensionType();
507 unsigned alignment = LN->getAlignment();
508 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
510 switch (LN->getAddressingMode()) {
511 case ISD::UNINDEXED: {
513 SDValue basePtr = LN->getBasePtr();
516 if (alignment == 16) {
519 // Special cases for a known aligned load to simplify the base pointer
520 // and the rotation amount:
521 if (basePtr.getOpcode() == ISD::ADD
522 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
523 // Known offset into basePtr
524 int64_t offset = CN->getSExtValue();
525 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
530 rotate = DAG.getConstant(rotamt, MVT::i16);
532 // Simplify the base pointer for this case:
533 basePtr = basePtr.getOperand(0);
534 if ((offset & ~0xf) > 0) {
535 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
537 DAG.getConstant((offset & ~0xf), PtrVT));
539 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
540 || (basePtr.getOpcode() == SPUISD::IndirectAddr
541 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
542 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
543 // Plain aligned a-form address: rotate into preferred slot
544 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
545 int64_t rotamt = -vtm->prefslot_byte;
548 rotate = DAG.getConstant(rotamt, MVT::i16);
550 // Offset the rotate amount by the basePtr and the preferred slot
552 int64_t rotamt = -vtm->prefslot_byte;
555 rotate = DAG.getNode(ISD::ADD, PtrVT,
557 DAG.getConstant(rotamt, PtrVT));
560 // Unaligned load: must be more pessimistic about addressing modes:
561 if (basePtr.getOpcode() == ISD::ADD) {
562 MachineFunction &MF = DAG.getMachineFunction();
563 MachineRegisterInfo &RegInfo = MF.getRegInfo();
564 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
567 SDValue Op0 = basePtr.getOperand(0);
568 SDValue Op1 = basePtr.getOperand(1);
570 if (isa<ConstantSDNode>(Op1)) {
571 // Convert the (add <ptr>, <const>) to an indirect address contained
572 // in a register. Note that this is done because we need to avoid
573 // creating a 0(reg) d-form address due to the SPU's block loads.
574 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
575 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
576 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
578 // Convert the (add <arg1>, <arg2>) to an indirect address, which
579 // will likely be lowered as a reg(reg) x-form address.
580 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
583 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
585 DAG.getConstant(0, PtrVT));
588 // Offset the rotate amount by the basePtr and the preferred slot
590 rotate = DAG.getNode(ISD::ADD, PtrVT,
592 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
595 // Re-emit as a v16i8 vector load
596 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 LN->isVolatile(), 16);
601 the_chain = result.getValue(1);
603 // Rotate into the preferred slot:
604 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
605 result.getValue(0), rotate);
607 // Convert the loaded v16i8 vector to the appropriate vector type
608 // specified by the operand:
609 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
610 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
611 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
613 // Handle extending loads by extending the scalar result:
614 if (ExtType == ISD::SEXTLOAD) {
615 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
616 } else if (ExtType == ISD::ZEXTLOAD) {
617 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
618 } else if (ExtType == ISD::EXTLOAD) {
619 unsigned NewOpc = ISD::ANY_EXTEND;
621 if (OutVT.isFloatingPoint())
622 NewOpc = ISD::FP_EXTEND;
624 result = DAG.getNode(NewOpc, OutVT, result);
627 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
628 SDValue retops[2] = {
633 result = DAG.getNode(SPUISD::LDRESULT, retvts,
634 retops, sizeof(retops) / sizeof(retops[0]));
641 case ISD::LAST_INDEXED_MODE:
642 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
644 cerr << (unsigned) LN->getAddressingMode() << "\n";
652 /// Custom lower stores for CellSPU
654 All CellSPU stores are aligned to 16-byte boundaries, so for elements
655 within a 16-byte block, we have to generate a shuffle to insert the
656 requested element into its place, then store the resulting block.
659 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
660 StoreSDNode *SN = cast<StoreSDNode>(Op);
661 SDValue Value = SN->getValue();
662 MVT VT = Value.getValueType();
663 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
664 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
665 unsigned alignment = SN->getAlignment();
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 // The vector type we really want to load from the 16-byte chunk.
670 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
671 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
673 SDValue alignLoadVec;
674 SDValue basePtr = SN->getBasePtr();
675 SDValue the_chain = SN->getChain();
676 SDValue insertEltOffs;
678 if (alignment == 16) {
681 // Special cases for a known aligned load to simplify the base pointer
682 // and insertion byte:
683 if (basePtr.getOpcode() == ISD::ADD
684 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
685 // Known offset into basePtr
686 int64_t offset = CN->getSExtValue();
688 // Simplify the base pointer for this case:
689 basePtr = basePtr.getOperand(0);
690 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
692 DAG.getConstant((offset & 0xf), PtrVT));
694 if ((offset & ~0xf) > 0) {
695 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
697 DAG.getConstant((offset & ~0xf), PtrVT));
700 // Otherwise, assume it's at byte 0 of basePtr
701 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
703 DAG.getConstant(0, PtrVT));
706 // Unaligned load: must be more pessimistic about addressing modes:
707 if (basePtr.getOpcode() == ISD::ADD) {
708 MachineFunction &MF = DAG.getMachineFunction();
709 MachineRegisterInfo &RegInfo = MF.getRegInfo();
710 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
713 SDValue Op0 = basePtr.getOperand(0);
714 SDValue Op1 = basePtr.getOperand(1);
716 if (isa<ConstantSDNode>(Op1)) {
717 // Convert the (add <ptr>, <const>) to an indirect address contained
718 // in a register. Note that this is done because we need to avoid
719 // creating a 0(reg) d-form address due to the SPU's block loads.
720 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
721 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
722 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
724 // Convert the (add <arg1>, <arg2>) to an indirect address, which
725 // will likely be lowered as a reg(reg) x-form address.
726 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
729 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
731 DAG.getConstant(0, PtrVT));
734 // Insertion point is solely determined by basePtr's contents
735 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
737 DAG.getConstant(0, PtrVT));
740 // Re-emit as a v16i8 vector load
741 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
742 SN->getSrcValue(), SN->getSrcValueOffset(),
743 SN->isVolatile(), 16);
746 the_chain = alignLoadVec.getValue(1);
748 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
749 SDValue theValue = SN->getValue();
753 && (theValue.getOpcode() == ISD::AssertZext
754 || theValue.getOpcode() == ISD::AssertSext)) {
755 // Drill down and get the value for zero- and sign-extended
757 theValue = theValue.getOperand(0);
760 // If the base pointer is already a D-form address, then just create
761 // a new D-form address with a slot offset and the orignal base pointer.
762 // Otherwise generate a D-form address with the slot offset relative
763 // to the stack pointer, which is always aligned.
765 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
766 cerr << "CellSPU LowerSTORE: basePtr = ";
767 basePtr.getNode()->dump(&DAG);
772 SDValue insertEltOp =
773 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
774 SDValue vectorizeOp =
775 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
777 result = DAG.getNode(SPUISD::SHUFB, vecVT,
778 vectorizeOp, alignLoadVec,
779 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
781 result = DAG.getStore(the_chain, result, basePtr,
782 LN->getSrcValue(), LN->getSrcValueOffset(),
783 LN->isVolatile(), LN->getAlignment());
785 #if 0 && !defined(NDEBUG)
786 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
787 const SDValue ¤tRoot = DAG.getRoot();
790 cerr << "------- CellSPU:LowerStore result:\n";
793 DAG.setRoot(currentRoot);
804 case ISD::LAST_INDEXED_MODE:
805 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
807 cerr << (unsigned) SN->getAddressingMode() << "\n";
815 //! Generate the address of a constant pool entry.
817 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
818 MVT PtrVT = Op.getValueType();
819 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
820 Constant *C = CP->getConstVal();
821 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
822 SDValue Zero = DAG.getConstant(0, PtrVT);
823 const TargetMachine &TM = DAG.getTarget();
825 if (TM.getRelocationModel() == Reloc::Static) {
826 if (!ST->usingLargeMem()) {
827 // Just return the SDValue with the constant pool address in it.
828 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
830 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
831 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
832 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
837 "LowerConstantPool: Relocation model other than static"
842 //! Alternate entry point for generating the address of a constant pool entry
844 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
845 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
849 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
850 MVT PtrVT = Op.getValueType();
851 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
852 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
853 SDValue Zero = DAG.getConstant(0, PtrVT);
854 const TargetMachine &TM = DAG.getTarget();
856 if (TM.getRelocationModel() == Reloc::Static) {
857 if (!ST->usingLargeMem()) {
858 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
860 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
861 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
862 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
867 "LowerJumpTable: Relocation model other than static not supported.");
872 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
873 MVT PtrVT = Op.getValueType();
874 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
875 GlobalValue *GV = GSDN->getGlobal();
876 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
877 const TargetMachine &TM = DAG.getTarget();
878 SDValue Zero = DAG.getConstant(0, PtrVT);
880 if (TM.getRelocationModel() == Reloc::Static) {
881 if (!ST->usingLargeMem()) {
882 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
884 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
885 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
886 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
889 cerr << "LowerGlobalAddress: Relocation model other than static not "
898 //! Custom lower double precision floating point constants
900 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
901 MVT VT = Op.getValueType();
903 if (VT == MVT::f64) {
904 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
907 "LowerConstantFP: Node is not ConstantFPSDNode");
909 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
910 SDValue T = DAG.getConstant(dbits, MVT::i64);
911 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
912 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
913 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
920 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
922 MachineFunction &MF = DAG.getMachineFunction();
923 MachineFrameInfo *MFI = MF.getFrameInfo();
924 MachineRegisterInfo &RegInfo = MF.getRegInfo();
925 SmallVector<SDValue, 48> ArgValues;
926 SDValue Root = Op.getOperand(0);
927 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
929 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
930 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
932 unsigned ArgOffset = SPUFrameInfo::minStackSize();
933 unsigned ArgRegIdx = 0;
934 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
936 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
938 // Add DAG nodes to load the arguments or copy them out of registers.
939 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
940 ArgNo != e; ++ArgNo) {
941 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
942 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
945 if (ArgRegIdx < NumArgRegs) {
946 const TargetRegisterClass *ArgRegClass;
948 switch (ObjectVT.getSimpleVT()) {
950 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
951 << ObjectVT.getMVTString()
956 ArgRegClass = &SPU::R8CRegClass;
959 ArgRegClass = &SPU::R16CRegClass;
962 ArgRegClass = &SPU::R32CRegClass;
965 ArgRegClass = &SPU::R64CRegClass;
968 ArgRegClass = &SPU::GPRCRegClass;
971 ArgRegClass = &SPU::R32FPRegClass;
974 ArgRegClass = &SPU::R64FPRegClass;
982 ArgRegClass = &SPU::VECREGRegClass;
986 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
987 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
988 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
991 // We need to load the argument to a virtual register if we determined
992 // above that we ran out of physical registers of the appropriate type
993 // or we're forced to do vararg
994 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
995 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
996 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
997 ArgOffset += StackSlotSize;
1000 ArgValues.push_back(ArgVal);
1002 Root = ArgVal.getOperand(0);
1007 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1008 // We will spill (79-3)+1 registers to the stack
1009 SmallVector<SDValue, 79-3+1> MemOps;
1011 // Create the frame slot
1013 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1014 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1015 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1016 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1017 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1018 Root = Store.getOperand(0);
1019 MemOps.push_back(Store);
1021 // Increment address by stack slot size for the next stored argument
1022 ArgOffset += StackSlotSize;
1024 if (!MemOps.empty())
1025 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1028 ArgValues.push_back(Root);
1030 // Return the new list of results.
1031 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1032 &ArgValues[0], ArgValues.size());
1035 /// isLSAAddress - Return the immediate to use if the specified
1036 /// value is representable as a LSA address.
1037 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1038 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1041 int Addr = C->getZExtValue();
1042 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1043 (Addr << 14 >> 14) != Addr)
1044 return 0; // Top 14 bits have to be sext of immediate.
1046 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1050 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1051 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1052 SDValue Chain = TheCall->getChain();
1053 SDValue Callee = TheCall->getCallee();
1054 unsigned NumOps = TheCall->getNumArgs();
1055 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1056 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1057 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1059 // Handy pointer type
1060 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1062 // Accumulate how many bytes are to be pushed on the stack, including the
1063 // linkage area, and parameter passing area. According to the SPU ABI,
1064 // we minimally need space for [LR] and [SP]
1065 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1067 // Set up a copy of the stack pointer for use loading and storing any
1068 // arguments that may not fit in the registers available for argument
1070 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1072 // Figure out which arguments are going to go in registers, and which in
1074 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1075 unsigned ArgRegIdx = 0;
1077 // Keep track of registers passing arguments
1078 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1079 // And the arguments passed on the stack
1080 SmallVector<SDValue, 8> MemOpChains;
1082 for (unsigned i = 0; i != NumOps; ++i) {
1083 SDValue Arg = TheCall->getArg(i);
1085 // PtrOff will be used to store the current argument to the stack if a
1086 // register cannot be found for it.
1087 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1088 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1090 switch (Arg.getValueType().getSimpleVT()) {
1091 default: assert(0 && "Unexpected ValueType for argument!");
1097 if (ArgRegIdx != NumArgRegs) {
1098 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1100 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1101 ArgOffset += StackSlotSize;
1106 if (ArgRegIdx != NumArgRegs) {
1107 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1109 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1110 ArgOffset += StackSlotSize;
1119 if (ArgRegIdx != NumArgRegs) {
1120 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1122 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1123 ArgOffset += StackSlotSize;
1129 // Update number of stack bytes actually used, insert a call sequence start
1130 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1131 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1134 if (!MemOpChains.empty()) {
1135 // Adjust the stack pointer for the stack arguments.
1136 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1137 &MemOpChains[0], MemOpChains.size());
1140 // Build a sequence of copy-to-reg nodes chained together with token chain
1141 // and flag operands which copy the outgoing args into the appropriate regs.
1143 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1144 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1146 InFlag = Chain.getValue(1);
1149 SmallVector<SDValue, 8> Ops;
1150 unsigned CallOpc = SPUISD::CALL;
1152 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1153 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1154 // node so that legalize doesn't hack it.
1155 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1156 GlobalValue *GV = G->getGlobal();
1157 MVT CalleeVT = Callee.getValueType();
1158 SDValue Zero = DAG.getConstant(0, PtrVT);
1159 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1161 if (!ST->usingLargeMem()) {
1162 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1163 // style calls, otherwise, external symbols are BRASL calls. This assumes
1164 // that declared/defined symbols are in the same compilation unit and can
1165 // be reached through PC-relative jumps.
1168 // This may be an unsafe assumption for JIT and really large compilation
1170 if (GV->isDeclaration()) {
1171 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1173 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1176 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1178 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1180 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1181 MVT CalleeVT = Callee.getValueType();
1182 SDValue Zero = DAG.getConstant(0, PtrVT);
1183 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1184 Callee.getValueType());
1186 if (!ST->usingLargeMem()) {
1187 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1189 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1191 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1192 // If this is an absolute destination address that appears to be a legal
1193 // local store address, use the munged value.
1194 Callee = SDValue(Dest, 0);
1197 Ops.push_back(Chain);
1198 Ops.push_back(Callee);
1200 // Add argument registers to the end of the list so that they are known live
1202 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1203 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1204 RegsToPass[i].second.getValueType()));
1206 if (InFlag.getNode())
1207 Ops.push_back(InFlag);
1208 // Returns a chain and a flag for retval copy to use.
1209 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1210 &Ops[0], Ops.size());
1211 InFlag = Chain.getValue(1);
1213 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1214 DAG.getIntPtrConstant(0, true), InFlag);
1215 if (TheCall->getValueType(0) != MVT::Other)
1216 InFlag = Chain.getValue(1);
1218 SDValue ResultVals[3];
1219 unsigned NumResults = 0;
1221 // If the call has results, copy the values out of the ret val registers.
1222 switch (TheCall->getValueType(0).getSimpleVT()) {
1223 default: assert(0 && "Unexpected ret value!");
1224 case MVT::Other: break;
1226 if (TheCall->getValueType(1) == MVT::i32) {
1227 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1228 ResultVals[0] = Chain.getValue(0);
1229 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1230 Chain.getValue(2)).getValue(1);
1231 ResultVals[1] = Chain.getValue(0);
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1235 ResultVals[0] = Chain.getValue(0);
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1241 ResultVals[0] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i128, InFlag).getValue(1);
1246 ResultVals[0] = Chain.getValue(0);
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1252 InFlag).getValue(1);
1253 ResultVals[0] = Chain.getValue(0);
1262 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1263 InFlag).getValue(1);
1264 ResultVals[0] = Chain.getValue(0);
1269 // If the function returns void, just return the chain.
1270 if (NumResults == 0)
1273 // Otherwise, merge everything together with a MERGE_VALUES node.
1274 ResultVals[NumResults++] = Chain;
1275 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1276 return Res.getValue(Op.getResNo());
1280 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1281 SmallVector<CCValAssign, 16> RVLocs;
1282 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1283 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1284 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1285 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1287 // If this is the first return lowered for this function, add the regs to the
1288 // liveout set for the function.
1289 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1290 for (unsigned i = 0; i != RVLocs.size(); ++i)
1291 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1294 SDValue Chain = Op.getOperand(0);
1297 // Copy the result values into the output registers.
1298 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1299 CCValAssign &VA = RVLocs[i];
1300 assert(VA.isRegLoc() && "Can only return in registers!");
1301 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1302 Flag = Chain.getValue(1);
1306 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1308 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1312 //===----------------------------------------------------------------------===//
1313 // Vector related lowering:
1314 //===----------------------------------------------------------------------===//
1316 static ConstantSDNode *
1317 getVecImm(SDNode *N) {
1318 SDValue OpVal(0, 0);
1320 // Check to see if this buildvec has a single non-undef value in its elements.
1321 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1322 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1323 if (OpVal.getNode() == 0)
1324 OpVal = N->getOperand(i);
1325 else if (OpVal != N->getOperand(i))
1329 if (OpVal.getNode() != 0) {
1330 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1335 return 0; // All UNDEF: use implicit def.; not Constant node
1338 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1339 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1341 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343 if (ConstantSDNode *CN = getVecImm(N)) {
1344 uint64_t Value = CN->getZExtValue();
1345 if (ValueType == MVT::i64) {
1346 uint64_t UValue = CN->getZExtValue();
1347 uint32_t upper = uint32_t(UValue >> 32);
1348 uint32_t lower = uint32_t(UValue);
1351 Value = Value >> 32;
1353 if (Value <= 0x3ffff)
1354 return DAG.getTargetConstant(Value, ValueType);
1360 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1361 /// and the value fits into a signed 16-bit constant, and if so, return the
1363 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1365 if (ConstantSDNode *CN = getVecImm(N)) {
1366 int64_t Value = CN->getSExtValue();
1367 if (ValueType == MVT::i64) {
1368 uint64_t UValue = CN->getZExtValue();
1369 uint32_t upper = uint32_t(UValue >> 32);
1370 uint32_t lower = uint32_t(UValue);
1373 Value = Value >> 32;
1375 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1376 return DAG.getTargetConstant(Value, ValueType);
1383 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1384 /// and the value fits into a signed 10-bit constant, and if so, return the
1386 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 int64_t Value = CN->getSExtValue();
1390 if (ValueType == MVT::i64) {
1391 uint64_t UValue = CN->getZExtValue();
1392 uint32_t upper = uint32_t(UValue >> 32);
1393 uint32_t lower = uint32_t(UValue);
1396 Value = Value >> 32;
1398 if (isS10Constant(Value))
1399 return DAG.getTargetConstant(Value, ValueType);
1405 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1406 /// and the value fits into a signed 8-bit constant, and if so, return the
1409 /// @note: The incoming vector is v16i8 because that's the only way we can load
1410 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1412 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1414 if (ConstantSDNode *CN = getVecImm(N)) {
1415 int Value = (int) CN->getZExtValue();
1416 if (ValueType == MVT::i16
1417 && Value <= 0xffff /* truncated from uint64_t */
1418 && ((short) Value >> 8) == ((short) Value & 0xff))
1419 return DAG.getTargetConstant(Value & 0xff, ValueType);
1420 else if (ValueType == MVT::i8
1421 && (Value & 0xff) == Value)
1422 return DAG.getTargetConstant(Value, ValueType);
1428 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into a signed 16-bit constant, and if so, return the
1431 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 uint64_t Value = CN->getZExtValue();
1435 if ((ValueType == MVT::i32
1436 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1437 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1438 return DAG.getTargetConstant(Value >> 16, ValueType);
1444 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1445 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1453 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1454 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1455 if (ConstantSDNode *CN = getVecImm(N)) {
1456 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1462 // If this is a vector of constants or undefs, get the bits. A bit in
1463 // UndefBits is set if the corresponding element of the vector is an
1464 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1465 // zero. Return true if this is not an array of constants, false if it is.
1467 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1468 uint64_t UndefBits[2]) {
1469 // Start with zero'd results.
1470 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1472 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1473 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1474 SDValue OpVal = BV->getOperand(i);
1476 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1477 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1479 uint64_t EltBits = 0;
1480 if (OpVal.getOpcode() == ISD::UNDEF) {
1481 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1482 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1484 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1485 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1486 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1487 const APFloat &apf = CN->getValueAPF();
1488 EltBits = (CN->getValueType(0) == MVT::f32
1489 ? FloatToBits(apf.convertToFloat())
1490 : DoubleToBits(apf.convertToDouble()));
1492 // Nonconstant element.
1496 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1499 //printf("%llx %llx %llx %llx\n",
1500 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1504 /// If this is a splat (repetition) of a value across the whole vector, return
1505 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1506 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1507 /// SplatSize = 1 byte.
1508 static bool isConstantSplat(const uint64_t Bits128[2],
1509 const uint64_t Undef128[2],
1511 uint64_t &SplatBits, uint64_t &SplatUndef,
1513 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1514 // the same as the lower 64-bits, ignoring undefs.
1515 uint64_t Bits64 = Bits128[0] | Bits128[1];
1516 uint64_t Undef64 = Undef128[0] & Undef128[1];
1517 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1518 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1519 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1520 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1522 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1523 if (MinSplatBits < 64) {
1525 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1527 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1528 if (MinSplatBits < 32) {
1530 // If the top 16-bits are different than the lower 16-bits, ignoring
1531 // undefs, we have an i32 splat.
1532 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1533 if (MinSplatBits < 16) {
1534 // If the top 8-bits are different than the lower 8-bits, ignoring
1535 // undefs, we have an i16 splat.
1536 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1537 == ((Bits16 >> 8) & ~Undef16)) {
1538 // Otherwise, we have an 8-bit splat.
1539 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1540 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1546 SplatUndef = Undef16;
1553 SplatUndef = Undef32;
1559 SplatBits = Bits128[0];
1560 SplatUndef = Undef128[0];
1566 return false; // Can't be a splat if two pieces don't match.
1569 //! Lower a BUILD_VECTOR instruction creatively:
1571 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1572 MVT VT = Op.getValueType();
1573 // If this is a vector of constants or undefs, get the bits. A bit in
1574 // UndefBits is set if the corresponding element of the vector is an
1575 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1577 uint64_t VectorBits[2];
1578 uint64_t UndefBits[2];
1579 uint64_t SplatBits, SplatUndef;
1581 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1582 || !isConstantSplat(VectorBits, UndefBits,
1583 VT.getVectorElementType().getSizeInBits(),
1584 SplatBits, SplatUndef, SplatSize))
1585 return SDValue(); // Not a constant vector, not a splat.
1587 switch (VT.getSimpleVT()) {
1589 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1590 << VT.getMVTString()
1595 uint32_t Value32 = uint32_t(SplatBits);
1596 assert(SplatSize == 4
1597 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1598 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599 SDValue T = DAG.getConstant(Value32, MVT::i32);
1600 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1601 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1605 uint64_t f64val = uint64_t(SplatBits);
1606 assert(SplatSize == 8
1607 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1608 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1609 SDValue T = DAG.getConstant(f64val, MVT::i64);
1610 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1615 // 8-bit constants have to be expanded to 16-bits
1616 unsigned short Value16 = SplatBits | (SplatBits << 8);
1618 for (int i = 0; i < 8; ++i)
1619 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1620 return DAG.getNode(ISD::BIT_CONVERT, VT,
1621 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1624 unsigned short Value16;
1626 Value16 = (unsigned short) (SplatBits & 0xffff);
1628 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1629 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1631 for (int i = 0; i < 8; ++i) Ops[i] = T;
1632 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1635 unsigned int Value = SplatBits;
1636 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1637 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1640 unsigned int Value = SplatBits;
1641 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1642 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
1645 return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
1653 SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
1654 uint32_t upper = uint32_t(SplatVal >> 32);
1655 uint32_t lower = uint32_t(SplatVal);
1657 if (upper == lower) {
1658 // Magic constant that can be matched by IL, ILA, et. al.
1659 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1660 return DAG.getNode(ISD::BIT_CONVERT, OpVT,
1661 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662 Val, Val, Val, Val));
1666 SmallVector<SDValue, 16> ShufBytes;
1668 bool upper_special, lower_special;
1670 // NOTE: This code creates common-case shuffle masks that can be easily
1671 // detected as common expressions. It is not attempting to create highly
1672 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1674 // Detect if the upper or lower half is a special shuffle mask pattern:
1675 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1676 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1678 // Create lower vector if not a special pattern
1679 if (!lower_special) {
1680 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1681 LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1682 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1683 LO32C, LO32C, LO32C, LO32C));
1686 // Create upper vector if not a special pattern
1687 if (!upper_special) {
1688 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1689 HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1690 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1691 HI32C, HI32C, HI32C, HI32C));
1694 // If either upper or lower are special, then the two input operands are
1695 // the same (basically, one of them is a "don't care")
1700 if (lower_special && upper_special) {
1701 // Unhappy situation... both upper and lower are special, so punt with
1702 // a target constant:
1703 SDValue Zero = DAG.getConstant(0, MVT::i32);
1704 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1708 for (int i = 0; i < 4; ++i) {
1710 for (int j = 0; j < 4; ++j) {
1712 bool process_upper, process_lower;
1714 process_upper = (upper_special && (i & 1) == 0);
1715 process_lower = (lower_special && (i & 1) == 1);
1717 if (process_upper || process_lower) {
1718 if ((process_upper && upper == 0)
1719 || (process_lower && lower == 0))
1721 else if ((process_upper && upper == 0xffffffff)
1722 || (process_lower && lower == 0xffffffff))
1724 else if ((process_upper && upper == 0x80000000)
1725 || (process_lower && lower == 0x80000000))
1726 val |= (j == 0 ? 0xe0 : 0x80);
1728 val |= i * 4 + j + ((i & 1) * 16);
1731 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1734 return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
1735 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1736 &ShufBytes[0], ShufBytes.size()));
1740 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1741 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1742 /// permutation vector, V3, is monotonically increasing with one "exception"
1743 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1744 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1745 /// In either case, the net result is going to eventually invoke SHUFB to
1746 /// permute/shuffle the bytes from V1 and V2.
1748 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1749 /// control word for byte/halfword/word insertion. This takes care of a single
1750 /// element move from V2 into V1.
1752 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1753 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1754 SDValue V1 = Op.getOperand(0);
1755 SDValue V2 = Op.getOperand(1);
1756 SDValue PermMask = Op.getOperand(2);
1758 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1760 // If we have a single element being moved from V1 to V2, this can be handled
1761 // using the C*[DX] compute mask instructions, but the vector elements have
1762 // to be monotonically increasing with one exception element.
1763 MVT VecVT = V1.getValueType();
1764 MVT EltVT = VecVT.getVectorElementType();
1765 unsigned EltsFromV2 = 0;
1767 unsigned V2EltIdx0 = 0;
1768 unsigned CurrElt = 0;
1769 unsigned MaxElts = VecVT.getVectorNumElements();
1770 unsigned PrevElt = 0;
1772 bool monotonic = true;
1775 if (EltVT == MVT::i8) {
1777 } else if (EltVT == MVT::i16) {
1779 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1781 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1784 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1786 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1787 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1788 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1791 if (SrcElt >= V2EltIdx0) {
1792 if (1 >= (++EltsFromV2)) {
1793 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1795 } else if (CurrElt != SrcElt) {
1803 if (PrevElt > 0 && SrcElt < MaxElts) {
1804 if ((PrevElt == SrcElt - 1)
1805 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1812 } else if (PrevElt == 0) {
1813 // First time through, need to keep track of previous element
1816 // This isn't a rotation, takes elements from vector 2
1823 if (EltsFromV2 == 1 && monotonic) {
1824 // Compute mask and shuffle
1825 MachineFunction &MF = DAG.getMachineFunction();
1826 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1827 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1828 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1829 // Initialize temporary register to 0
1830 SDValue InitTempReg =
1831 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1832 // Copy register's contents as index in SHUFFLE_MASK:
1833 SDValue ShufMaskOp =
1834 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1835 DAG.getTargetConstant(V2Elt, MVT::i32),
1836 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1837 // Use shuffle mask in SHUFB synthetic instruction:
1838 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1839 } else if (rotate) {
1840 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1842 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1843 V1, DAG.getConstant(rotamt, MVT::i16));
1845 // Convert the SHUFFLE_VECTOR mask's input element units to the
1847 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1849 SmallVector<SDValue, 16> ResultMask;
1850 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1852 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1855 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1857 for (unsigned j = 0; j < BytesPerElement; ++j) {
1858 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1863 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1864 &ResultMask[0], ResultMask.size());
1865 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1869 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1870 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1872 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1873 // For a constant, build the appropriate constant vector, which will
1874 // eventually simplify to a vector register load.
1876 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1877 SmallVector<SDValue, 16> ConstVecValues;
1881 // Create a constant vector:
1882 switch (Op.getValueType().getSimpleVT()) {
1883 default: assert(0 && "Unexpected constant value type in "
1884 "LowerSCALAR_TO_VECTOR");
1885 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1886 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1887 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1888 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1889 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1890 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1893 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1894 for (size_t j = 0; j < n_copies; ++j)
1895 ConstVecValues.push_back(CValue);
1897 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1898 &ConstVecValues[0], ConstVecValues.size());
1900 // Otherwise, copy the value from one register to another:
1901 switch (Op0.getValueType().getSimpleVT()) {
1902 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1909 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1916 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1917 MVT VT = Op.getValueType();
1918 SDValue N = Op.getOperand(0);
1919 SDValue Elt = Op.getOperand(1);
1922 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1923 // Constant argument:
1924 int EltNo = (int) C->getZExtValue();
1927 if (VT == MVT::i8 && EltNo >= 16)
1928 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1929 else if (VT == MVT::i16 && EltNo >= 8)
1930 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1931 else if (VT == MVT::i32 && EltNo >= 4)
1932 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1933 else if (VT == MVT::i64 && EltNo >= 2)
1934 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1936 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1937 // i32 and i64: Element 0 is the preferred slot
1938 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1941 // Need to generate shuffle mask and extract:
1942 int prefslot_begin = -1, prefslot_end = -1;
1943 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1945 switch (VT.getSimpleVT()) {
1947 assert(false && "Invalid value type!");
1949 prefslot_begin = prefslot_end = 3;
1953 prefslot_begin = 2; prefslot_end = 3;
1958 prefslot_begin = 0; prefslot_end = 3;
1963 prefslot_begin = 0; prefslot_end = 7;
1968 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1969 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1971 unsigned int ShufBytes[16];
1972 for (int i = 0; i < 16; ++i) {
1973 // zero fill uppper part of preferred slot, don't care about the
1975 unsigned int mask_val;
1976 if (i <= prefslot_end) {
1978 ((i < prefslot_begin)
1980 : elt_byte + (i - prefslot_begin));
1982 ShufBytes[i] = mask_val;
1984 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1987 SDValue ShufMask[4];
1988 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1989 unsigned bidx = i * 4;
1990 unsigned int bits = ((ShufBytes[bidx] << 24) |
1991 (ShufBytes[bidx+1] << 16) |
1992 (ShufBytes[bidx+2] << 8) |
1994 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1997 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1999 sizeof(ShufMask) / sizeof(ShufMask[0]));
2001 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2002 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2003 N, N, ShufMaskVec));
2005 // Variable index: Rotate the requested element into slot 0, then replicate
2006 // slot 0 across the vector
2007 MVT VecVT = N.getValueType();
2008 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2009 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2013 // Make life easier by making sure the index is zero-extended to i32
2014 if (Elt.getValueType() != MVT::i32)
2015 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2017 // Scale the index to a bit/byte shift quantity
2019 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2020 unsigned scaleShift = scaleFactor.logBase2();
2023 if (scaleShift > 0) {
2024 // Scale the shift factor:
2025 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2026 DAG.getConstant(scaleShift, MVT::i32));
2029 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2031 // Replicate the bytes starting at byte 0 across the entire vector (for
2032 // consistency with the notion of a unified register set)
2035 switch (VT.getSimpleVT()) {
2037 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2041 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2042 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2047 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2048 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2054 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2055 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2061 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2062 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2063 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2064 loFactor, hiFactor);
2069 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2070 DAG.getNode(SPUISD::SHUFB, VecVT,
2071 vecShift, vecShift, replicate));
2077 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2078 SDValue VecOp = Op.getOperand(0);
2079 SDValue ValOp = Op.getOperand(1);
2080 SDValue IdxOp = Op.getOperand(2);
2081 MVT VT = Op.getValueType();
2083 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2084 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2086 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2087 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2088 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2089 DAG.getRegister(SPU::R1, PtrVT),
2090 DAG.getConstant(CN->getSExtValue(), PtrVT));
2091 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2094 DAG.getNode(SPUISD::SHUFB, VT,
2095 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2097 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2102 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2103 const TargetLowering &TLI)
2105 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2106 MVT ShiftVT = TLI.getShiftAmountTy();
2108 assert(Op.getValueType() == MVT::i8);
2111 assert(0 && "Unhandled i8 math operator");
2115 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2117 SDValue N1 = Op.getOperand(1);
2118 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2119 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2120 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2121 DAG.getNode(Opc, MVT::i16, N0, N1));
2126 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2128 SDValue N1 = Op.getOperand(1);
2129 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2130 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2131 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2132 DAG.getNode(Opc, MVT::i16, N0, N1));
2136 SDValue N1 = Op.getOperand(1);
2138 N0 = (N0.getOpcode() != ISD::Constant
2139 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2140 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2142 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2145 N1 = (N1.getOpcode() != ISD::Constant
2146 ? DAG.getNode(N1Opc, ShiftVT, N1)
2147 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2148 TLI.getShiftAmountTy()));
2150 DAG.getNode(ISD::OR, MVT::i16, N0,
2151 DAG.getNode(ISD::SHL, MVT::i16,
2152 N0, DAG.getConstant(8, MVT::i32)));
2153 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2154 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2158 SDValue N1 = Op.getOperand(1);
2160 N0 = (N0.getOpcode() != ISD::Constant
2161 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2162 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2164 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2167 N1 = (N1.getOpcode() != ISD::Constant
2168 ? DAG.getNode(N1Opc, ShiftVT, N1)
2169 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2170 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2171 DAG.getNode(Opc, MVT::i16, N0, N1));
2174 SDValue N1 = Op.getOperand(1);
2176 N0 = (N0.getOpcode() != ISD::Constant
2177 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2178 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2180 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2183 N1 = (N1.getOpcode() != ISD::Constant
2184 ? DAG.getNode(N1Opc, ShiftVT, N1)
2185 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2187 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2188 DAG.getNode(Opc, MVT::i16, N0, N1));
2191 SDValue N1 = Op.getOperand(1);
2193 N0 = (N0.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2195 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2197 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2198 N1 = (N1.getOpcode() != ISD::Constant
2199 ? DAG.getNode(N1Opc, MVT::i16, N1)
2200 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2202 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2203 DAG.getNode(Opc, MVT::i16, N0, N1));
2211 //! Generate the carry-generate shuffle mask.
2212 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
2213 SmallVector<SDValue, 16 > ShufBytes;
2215 // Create the shuffle mask for "rotating" the borrow up one register slot
2216 // once the borrow is generated.
2217 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2218 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2219 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2220 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2222 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2223 &ShufBytes[0], ShufBytes.size());
2226 //! Generate the borrow-generate shuffle mask
2227 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
2228 SmallVector<SDValue, 16 > ShufBytes;
2230 // Create the shuffle mask for "rotating" the borrow up one register slot
2231 // once the borrow is generated.
2232 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2233 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2234 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2235 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2237 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2238 &ShufBytes[0], ShufBytes.size());
2241 //! Lower byte immediate operations for v16i8 vectors:
2243 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2246 MVT VT = Op.getValueType();
2248 ConstVec = Op.getOperand(0);
2249 Arg = Op.getOperand(1);
2250 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2251 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2252 ConstVec = ConstVec.getOperand(0);
2254 ConstVec = Op.getOperand(1);
2255 Arg = Op.getOperand(0);
2256 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2257 ConstVec = ConstVec.getOperand(0);
2262 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2263 uint64_t VectorBits[2];
2264 uint64_t UndefBits[2];
2265 uint64_t SplatBits, SplatUndef;
2268 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2269 && isConstantSplat(VectorBits, UndefBits,
2270 VT.getVectorElementType().getSizeInBits(),
2271 SplatBits, SplatUndef, SplatSize)) {
2273 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2274 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2276 // Turn the BUILD_VECTOR into a set of target constants:
2277 for (size_t i = 0; i < tcVecSize; ++i)
2280 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2281 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2284 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2285 // lowered. Return the operation, rather than a null SDValue.
2289 //! Custom lowering for CTPOP (count population)
2291 Custom lowering code that counts the number ones in the input
2292 operand. SPU has such an instruction, but it counts the number of
2293 ones per byte, which then have to be accumulated.
2295 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2296 MVT VT = Op.getValueType();
2297 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2299 switch (VT.getSimpleVT()) {
2301 assert(false && "Invalid value type!");
2303 SDValue N = Op.getOperand(0);
2304 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2306 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2307 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2309 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2313 MachineFunction &MF = DAG.getMachineFunction();
2314 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2316 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2318 SDValue N = Op.getOperand(0);
2319 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2320 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2321 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2323 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2324 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2326 // CNTB_result becomes the chain to which all of the virtual registers
2327 // CNTB_reg, SUM1_reg become associated:
2328 SDValue CNTB_result =
2329 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2331 SDValue CNTB_rescopy =
2332 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2334 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2336 return DAG.getNode(ISD::AND, MVT::i16,
2337 DAG.getNode(ISD::ADD, MVT::i16,
2338 DAG.getNode(ISD::SRL, MVT::i16,
2345 MachineFunction &MF = DAG.getMachineFunction();
2346 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2348 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2349 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2351 SDValue N = Op.getOperand(0);
2352 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2353 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2354 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2355 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2357 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2358 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2360 // CNTB_result becomes the chain to which all of the virtual registers
2361 // CNTB_reg, SUM1_reg become associated:
2362 SDValue CNTB_result =
2363 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2365 SDValue CNTB_rescopy =
2366 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2369 DAG.getNode(ISD::SRL, MVT::i32,
2370 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2373 DAG.getNode(ISD::ADD, MVT::i32,
2374 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2376 SDValue Sum1_rescopy =
2377 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2380 DAG.getNode(ISD::SRL, MVT::i32,
2381 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2384 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2385 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2387 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2397 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2399 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2400 All conversions to i64 are expanded to a libcall.
2402 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2403 SPUTargetLowering &TLI) {
2404 MVT OpVT = Op.getValueType();
2405 SDValue Op0 = Op.getOperand(0);
2406 MVT Op0VT = Op0.getValueType();
2408 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2409 || OpVT == MVT::i64) {
2410 // Convert f32 / f64 to i32 / i64 via libcall.
2412 (Op.getOpcode() == ISD::FP_TO_SINT)
2413 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2414 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2415 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2417 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2423 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2425 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2426 All conversions from i64 are expanded to a libcall.
2428 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2429 SPUTargetLowering &TLI) {
2430 MVT OpVT = Op.getValueType();
2431 SDValue Op0 = Op.getOperand(0);
2432 MVT Op0VT = Op0.getValueType();
2434 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2435 || Op0VT == MVT::i64) {
2436 // Convert i32, i64 to f64 via libcall:
2438 (Op.getOpcode() == ISD::SINT_TO_FP)
2439 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2440 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2441 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2443 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2449 //! Lower ISD::SETCC
2451 This handles MVT::f64 (double floating point) condition lowering
2453 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2454 const TargetLowering &TLI) {
2455 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2456 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2458 SDValue lhs = Op.getOperand(0);
2459 SDValue rhs = Op.getOperand(1);
2460 MVT lhsVT = lhs.getValueType();
2461 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2463 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2464 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2465 MVT IntVT(MVT::i64);
2467 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2468 // selected to a NOP:
2469 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, lhs);
2471 DAG.getNode(ISD::TRUNCATE, MVT::i32,
2472 DAG.getNode(ISD::SRL, IntVT,
2473 i64lhs, DAG.getConstant(32, MVT::i32)));
2474 SDValue lhsHi32abs =
2475 DAG.getNode(ISD::AND, MVT::i32,
2476 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2478 DAG.getNode(ISD::TRUNCATE, MVT::i32, i64lhs);
2480 // SETO and SETUO only use the lhs operand:
2481 if (CC->get() == ISD::SETO) {
2482 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2484 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2485 return DAG.getNode(ISD::XOR, ccResultVT,
2486 DAG.getSetCC(ccResultVT,
2487 lhs, DAG.getConstantFP(0.0, lhsVT),
2489 DAG.getConstant(ccResultAllOnes, ccResultVT));
2490 } else if (CC->get() == ISD::SETUO) {
2491 // Evaluates to true if Op0 is [SQ]NaN
2492 return DAG.getNode(ISD::AND, ccResultVT,
2493 DAG.getSetCC(ccResultVT,
2495 DAG.getConstant(0x7ff00000, MVT::i32),
2497 DAG.getSetCC(ccResultVT,
2499 DAG.getConstant(0, MVT::i32),
2503 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
2505 DAG.getNode(ISD::TRUNCATE, MVT::i32,
2506 DAG.getNode(ISD::SRL, IntVT,
2507 i64rhs, DAG.getConstant(32, MVT::i32)));
2509 // If a value is negative, subtract from the sign magnitude constant:
2510 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2512 // Convert the sign-magnitude representation into 2's complement:
2513 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
2514 lhsHi32, DAG.getConstant(31, MVT::i32));
2515 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64lhs);
2517 DAG.getNode(ISD::SELECT, IntVT,
2518 lhsSelectMask, lhsSignMag2TC, i64lhs);
2520 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
2521 rhsHi32, DAG.getConstant(31, MVT::i32));
2522 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64rhs);
2524 DAG.getNode(ISD::SELECT, IntVT,
2525 rhsSelectMask, rhsSignMag2TC, i64rhs);
2529 switch (CC->get()) {
2532 compareOp = ISD::SETEQ; break;
2535 compareOp = ISD::SETGT; break;
2538 compareOp = ISD::SETGE; break;
2541 compareOp = ISD::SETLT; break;
2544 compareOp = ISD::SETLE; break;
2547 compareOp = ISD::SETNE; break;
2549 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2555 DAG.getSetCC(ccResultVT, lhsSelect, rhsSelect, (ISD::CondCode) compareOp);
2557 if ((CC->get() & 0x8) == 0) {
2558 // Ordered comparison:
2559 SDValue lhsNaN = DAG.getSetCC(ccResultVT,
2560 lhs, DAG.getConstantFP(0.0, MVT::f64),
2562 SDValue rhsNaN = DAG.getSetCC(ccResultVT,
2563 rhs, DAG.getConstantFP(0.0, MVT::f64),
2565 SDValue ordered = DAG.getNode(ISD::AND, ccResultVT, lhsNaN, rhsNaN);
2567 result = DAG.getNode(ISD::AND, ccResultVT, ordered, result);
2573 //! Lower ISD::SELECT_CC
2575 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2578 \note Need to revisit this in the future: if the code path through the true
2579 and false value computations is longer than the latency of a branch (6
2580 cycles), then it would be more advantageous to branch and insert a new basic
2581 block and branch on the condition. However, this code does not make that
2582 assumption, given the simplisitc uses so far.
2585 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2586 const TargetLowering &TLI) {
2587 MVT VT = Op.getValueType();
2588 SDValue lhs = Op.getOperand(0);
2589 SDValue rhs = Op.getOperand(1);
2590 SDValue trueval = Op.getOperand(2);
2591 SDValue falseval = Op.getOperand(3);
2592 SDValue condition = Op.getOperand(4);
2594 // NOTE: SELB's arguments: $rA, $rB, $mask
2596 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2597 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2598 // condition was true and 0s where the condition was false. Hence, the
2599 // arguments to SELB get reversed.
2601 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2602 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2603 // with another "cannot select select_cc" assert:
2605 SDValue compare = DAG.getNode(ISD::SETCC,
2606 TLI.getSetCCResultType(Op.getValueType()),
2607 lhs, rhs, condition);
2608 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2611 //! Custom lower ISD::TRUNCATE
2612 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2614 MVT VT = Op.getValueType();
2615 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2616 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2618 SDValue Op0 = Op.getOperand(0);
2619 MVT Op0VT = Op0.getValueType();
2620 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2622 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2623 // Create shuffle mask, least significant doubleword of quadword
2624 unsigned maskHigh = 0x08090a0b;
2625 unsigned maskLow = 0x0c0d0e0f;
2626 // Use a shuffle to perform the truncation
2627 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2628 DAG.getConstant(maskHigh, MVT::i32),
2629 DAG.getConstant(maskLow, MVT::i32),
2630 DAG.getConstant(maskHigh, MVT::i32),
2631 DAG.getConstant(maskLow, MVT::i32));
2634 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2636 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2637 PromoteScalar, PromoteScalar, shufMask);
2639 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2640 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2643 return SDValue(); // Leave the truncate unmolested
2646 //! Custom (target-specific) lowering entry point
2648 This is where LLVM's DAG selection process calls to do target-specific
2652 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2654 unsigned Opc = (unsigned) Op.getOpcode();
2655 MVT VT = Op.getValueType();
2659 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2660 cerr << "Op.getOpcode() = " << Opc << "\n";
2661 cerr << "*Op.getNode():\n";
2662 Op.getNode()->dump();
2669 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2671 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2672 case ISD::ConstantPool:
2673 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2674 case ISD::GlobalAddress:
2675 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2676 case ISD::JumpTable:
2677 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2678 case ISD::ConstantFP:
2679 return LowerConstantFP(Op, DAG);
2680 case ISD::FORMAL_ARGUMENTS:
2681 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2683 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2685 return LowerRET(Op, DAG, getTargetMachine());
2687 // i8, i64 math ops:
2696 return LowerI8Math(Op, DAG, Opc, *this);
2700 case ISD::FP_TO_SINT:
2701 case ISD::FP_TO_UINT:
2702 return LowerFP_TO_INT(Op, DAG, *this);
2704 case ISD::SINT_TO_FP:
2705 case ISD::UINT_TO_FP:
2706 return LowerINT_TO_FP(Op, DAG, *this);
2708 // Vector-related lowering.
2709 case ISD::BUILD_VECTOR:
2710 return LowerBUILD_VECTOR(Op, DAG);
2711 case ISD::SCALAR_TO_VECTOR:
2712 return LowerSCALAR_TO_VECTOR(Op, DAG);
2713 case ISD::VECTOR_SHUFFLE:
2714 return LowerVECTOR_SHUFFLE(Op, DAG);
2715 case ISD::EXTRACT_VECTOR_ELT:
2716 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2717 case ISD::INSERT_VECTOR_ELT:
2718 return LowerINSERT_VECTOR_ELT(Op, DAG);
2720 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2724 return LowerByteImmed(Op, DAG);
2726 // Vector and i8 multiply:
2729 return LowerI8Math(Op, DAG, Opc, *this);
2732 return LowerCTPOP(Op, DAG);
2734 case ISD::SELECT_CC:
2735 return LowerSELECT_CC(Op, DAG, *this);
2738 return LowerSETCC(Op, DAG, *this);
2741 return LowerTRUNCATE(Op, DAG);
2747 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2748 SmallVectorImpl<SDValue>&Results,
2752 unsigned Opc = (unsigned) N->getOpcode();
2753 MVT OpVT = N->getValueType(0);
2757 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2758 cerr << "Op.getOpcode() = " << Opc << "\n";
2759 cerr << "*Op.getNode():\n";
2767 /* Otherwise, return unchanged */
2770 //===----------------------------------------------------------------------===//
2771 // Target Optimization Hooks
2772 //===----------------------------------------------------------------------===//
2775 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2778 TargetMachine &TM = getTargetMachine();
2780 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2781 SelectionDAG &DAG = DCI.DAG;
2782 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2783 MVT NodeVT = N->getValueType(0); // The node's value type
2784 MVT Op0VT = Op0.getValueType(); // The first operand's result
2785 SDValue Result; // Initially, empty result
2787 switch (N->getOpcode()) {
2790 SDValue Op1 = N->getOperand(1);
2792 if (Op0.getOpcode() == SPUISD::IndirectAddr
2793 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2794 // Normalize the operands to reduce repeated code
2795 SDValue IndirectArg = Op0, AddArg = Op1;
2797 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2802 if (isa<ConstantSDNode>(AddArg)) {
2803 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2804 SDValue IndOp1 = IndirectArg.getOperand(1);
2806 if (CN0->isNullValue()) {
2807 // (add (SPUindirect <arg>, <arg>), 0) ->
2808 // (SPUindirect <arg>, <arg>)
2810 #if !defined(NDEBUG)
2811 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2813 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2814 << "With: (SPUindirect <arg>, <arg>)\n";
2819 } else if (isa<ConstantSDNode>(IndOp1)) {
2820 // (add (SPUindirect <arg>, <const>), <const>) ->
2821 // (SPUindirect <arg>, <const + const>)
2822 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2823 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2824 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2826 #if !defined(NDEBUG)
2827 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2829 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2830 << "), " << CN0->getSExtValue() << ")\n"
2831 << "With: (SPUindirect <arg>, "
2832 << combinedConst << ")\n";
2836 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2837 IndirectArg, combinedValue);
2843 case ISD::SIGN_EXTEND:
2844 case ISD::ZERO_EXTEND:
2845 case ISD::ANY_EXTEND: {
2846 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2847 // (any_extend (SPUextract_elt0 <arg>)) ->
2848 // (SPUextract_elt0 <arg>)
2849 // Types must match, however...
2850 #if !defined(NDEBUG)
2851 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2852 cerr << "\nReplace: ";
2855 Op0.getNode()->dump(&DAG);
2864 case SPUISD::IndirectAddr: {
2865 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2866 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2867 if (CN != 0 && CN->getZExtValue() == 0) {
2868 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2869 // (SPUaform <addr>, 0)
2871 DEBUG(cerr << "Replace: ");
2872 DEBUG(N->dump(&DAG));
2873 DEBUG(cerr << "\nWith: ");
2874 DEBUG(Op0.getNode()->dump(&DAG));
2875 DEBUG(cerr << "\n");
2879 } else if (Op0.getOpcode() == ISD::ADD) {
2880 SDValue Op1 = N->getOperand(1);
2881 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2882 // (SPUindirect (add <arg>, <arg>), 0) ->
2883 // (SPUindirect <arg>, <arg>)
2884 if (CN1->isNullValue()) {
2886 #if !defined(NDEBUG)
2887 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2889 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2890 << "With: (SPUindirect <arg>, <arg>)\n";
2894 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2895 Op0.getOperand(0), Op0.getOperand(1));
2901 case SPUISD::SHLQUAD_L_BITS:
2902 case SPUISD::SHLQUAD_L_BYTES:
2903 case SPUISD::VEC_SHL:
2904 case SPUISD::VEC_SRL:
2905 case SPUISD::VEC_SRA:
2906 case SPUISD::ROTBYTES_LEFT: {
2907 SDValue Op1 = N->getOperand(1);
2909 // Kill degenerate vector shifts:
2910 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2911 if (CN->isNullValue()) {
2917 case SPUISD::PREFSLOT2VEC: {
2918 switch (Op0.getOpcode()) {
2921 case ISD::ANY_EXTEND:
2922 case ISD::ZERO_EXTEND:
2923 case ISD::SIGN_EXTEND: {
2924 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2926 // but only if the SPUprefslot2vec and <arg> types match.
2927 SDValue Op00 = Op0.getOperand(0);
2928 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2929 SDValue Op000 = Op00.getOperand(0);
2930 if (Op000.getValueType() == NodeVT) {
2936 case SPUISD::VEC2PREFSLOT: {
2937 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2939 Result = Op0.getOperand(0);
2947 // Otherwise, return unchanged.
2949 if (Result.getNode()) {
2950 DEBUG(cerr << "\nReplace.SPU: ");
2951 DEBUG(N->dump(&DAG));
2952 DEBUG(cerr << "\nWith: ");
2953 DEBUG(Result.getNode()->dump(&DAG));
2954 DEBUG(cerr << "\n");
2961 //===----------------------------------------------------------------------===//
2962 // Inline Assembly Support
2963 //===----------------------------------------------------------------------===//
2965 /// getConstraintType - Given a constraint letter, return the type of
2966 /// constraint it is for this target.
2967 SPUTargetLowering::ConstraintType
2968 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2969 if (ConstraintLetter.size() == 1) {
2970 switch (ConstraintLetter[0]) {
2977 return C_RegisterClass;
2980 return TargetLowering::getConstraintType(ConstraintLetter);
2983 std::pair<unsigned, const TargetRegisterClass*>
2984 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2987 if (Constraint.size() == 1) {
2988 // GCC RS6000 Constraint Letters
2989 switch (Constraint[0]) {
2993 return std::make_pair(0U, SPU::R64CRegisterClass);
2994 return std::make_pair(0U, SPU::R32CRegisterClass);
2997 return std::make_pair(0U, SPU::R32FPRegisterClass);
2998 else if (VT == MVT::f64)
2999 return std::make_pair(0U, SPU::R64FPRegisterClass);
3002 return std::make_pair(0U, SPU::GPRCRegisterClass);
3006 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3009 //! Compute used/known bits for a SPU operand
3011 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3015 const SelectionDAG &DAG,
3016 unsigned Depth ) const {
3018 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3020 switch (Op.getOpcode()) {
3022 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3028 case SPUISD::PREFSLOT2VEC:
3029 case SPUISD::LDRESULT:
3030 case SPUISD::VEC2PREFSLOT:
3031 case SPUISD::SHLQUAD_L_BITS:
3032 case SPUISD::SHLQUAD_L_BYTES:
3033 case SPUISD::VEC_SHL:
3034 case SPUISD::VEC_SRL:
3035 case SPUISD::VEC_SRA:
3036 case SPUISD::VEC_ROTL:
3037 case SPUISD::VEC_ROTR:
3038 case SPUISD::ROTBYTES_LEFT:
3039 case SPUISD::SELECT_MASK:
3046 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3047 unsigned Depth) const {
3048 switch (Op.getOpcode()) {
3053 MVT VT = Op.getValueType();
3055 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3058 return VT.getSizeInBits();
3063 // LowerAsmOperandForConstraint
3065 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3066 char ConstraintLetter,
3068 std::vector<SDValue> &Ops,
3069 SelectionDAG &DAG) const {
3070 // Default, for the time being, to the base class handler
3071 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3075 /// isLegalAddressImmediate - Return true if the integer value can be used
3076 /// as the offset of the target addressing mode.
3077 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3078 const Type *Ty) const {
3079 // SPU's addresses are 256K:
3080 return (V > -(1 << 18) && V < (1 << 18) - 1);
3083 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3088 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3089 // The SPU target isn't yet aware of offsets.