1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
73 cerr << "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG,
117 Op.getNode()->getDebugLoc());
119 return CallInfo.first;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
124 : TargetLowering(TM),
127 // Fold away setcc operations if possible.
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
139 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
140 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
141 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
142 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
143 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
144 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
149 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
152 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
156 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
161 MVT VT = (MVT::SimpleValueType)sctype;
163 setOperationAction(ISD::LOAD, VT, Custom);
164 setOperationAction(ISD::STORE, VT, Custom);
165 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
167 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
169 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
170 MVT StoreVT = (MVT::SimpleValueType) stype;
171 setTruncStoreAction(VT, StoreVT, Expand);
175 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
177 MVT VT = (MVT::SimpleValueType) sctype;
179 setOperationAction(ISD::LOAD, VT, Custom);
180 setOperationAction(ISD::STORE, VT, Custom);
182 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
183 MVT StoreVT = (MVT::SimpleValueType) stype;
184 setTruncStoreAction(VT, StoreVT, Expand);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
190 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
194 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
197 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM, MVT::i32, Expand);
204 setOperationAction(ISD::UREM, MVT::i32, Expand);
205 setOperationAction(ISD::SREM, MVT::i64, Expand);
206 setOperationAction(ISD::UREM, MVT::i64, Expand);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN , MVT::f64, Expand);
210 setOperationAction(ISD::FCOS , MVT::f64, Expand);
211 setOperationAction(ISD::FREM , MVT::f64, Expand);
212 setOperationAction(ISD::FSIN , MVT::f32, Expand);
213 setOperationAction(ISD::FCOS , MVT::f32, Expand);
214 setOperationAction(ISD::FREM , MVT::f32, Expand);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
218 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
219 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
222 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
229 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
231 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
233 setOperationAction(ISD::ROTL, MVT::i32, Legal);
234 setOperationAction(ISD::ROTL, MVT::i16, Legal);
235 setOperationAction(ISD::ROTL, MVT::i8, Custom);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL, MVT::i8, Custom);
239 setOperationAction(ISD::SRL, MVT::i8, Custom);
240 setOperationAction(ISD::SRA, MVT::i8, Custom);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL, MVT::i64, Legal);
244 setOperationAction(ISD::SRL, MVT::i64, Legal);
245 setOperationAction(ISD::SRA, MVT::i64, Legal);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL, MVT::i8, Custom);
249 setOperationAction(ISD::MUL, MVT::i32, Legal);
250 setOperationAction(ISD::MUL, MVT::i64, Legal);
252 // Need to custom handle (some) common i8, i64 math ops
253 setOperationAction(ISD::ADD, MVT::i8, Custom);
254 setOperationAction(ISD::ADD, MVT::i64, Legal);
255 setOperationAction(ISD::SUB, MVT::i8, Custom);
256 setOperationAction(ISD::SUB, MVT::i64, Legal);
258 // SPU does not have BSWAP. It does have i32 support CTLZ.
259 // CTPOP has to be custom lowered.
260 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
261 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
263 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
264 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
265 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
266 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
268 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
269 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
271 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
273 // SPU has a version of select that implements (a&~c)|(b&c), just like
274 // select ought to work:
275 setOperationAction(ISD::SELECT, MVT::i8, Legal);
276 setOperationAction(ISD::SELECT, MVT::i16, Legal);
277 setOperationAction(ISD::SELECT, MVT::i32, Legal);
278 setOperationAction(ISD::SELECT, MVT::i64, Legal);
280 setOperationAction(ISD::SETCC, MVT::i8, Legal);
281 setOperationAction(ISD::SETCC, MVT::i16, Legal);
282 setOperationAction(ISD::SETCC, MVT::i32, Legal);
283 setOperationAction(ISD::SETCC, MVT::i64, Legal);
284 setOperationAction(ISD::SETCC, MVT::f64, Custom);
286 // Custom lower i128 -> i64 truncates
287 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
289 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
290 // to expand to a libcall, hence the custom lowering:
291 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
297 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
298 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
317 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
323 MVT VT = (MVT::SimpleValueType)sctype;
325 setOperationAction(ISD::GlobalAddress, VT, Custom);
326 setOperationAction(ISD::ConstantPool, VT, Custom);
327 setOperationAction(ISD::JumpTable, VT, Custom);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET, MVT::Other, Custom);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART , MVT::Other, Custom);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG , MVT::Other, Expand);
338 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
339 setOperationAction(ISD::VAEND , MVT::Other, Expand);
340 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
341 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
347 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
364 // "Odd size" vector classes that we're willing to support:
365 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
367 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
368 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
369 MVT VT = (MVT::SimpleValueType)i;
371 // add/sub are legal for all supported vector VT's.
372 setOperationAction(ISD::ADD, VT, Legal);
373 setOperationAction(ISD::SUB, VT, Legal);
374 // mul has to be custom lowered.
375 setOperationAction(ISD::MUL, VT, Legal);
377 setOperationAction(ISD::AND, VT, Legal);
378 setOperationAction(ISD::OR, VT, Legal);
379 setOperationAction(ISD::XOR, VT, Legal);
380 setOperationAction(ISD::LOAD, VT, Legal);
381 setOperationAction(ISD::SELECT, VT, Legal);
382 setOperationAction(ISD::STORE, VT, Legal);
384 // These operations need to be expanded:
385 setOperationAction(ISD::SDIV, VT, Expand);
386 setOperationAction(ISD::SREM, VT, Expand);
387 setOperationAction(ISD::UDIV, VT, Expand);
388 setOperationAction(ISD::UREM, VT, Expand);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
393 setOperationAction(ISD::ConstantPool, VT, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
407 setShiftAmountType(MVT::i32);
408 setBooleanContents(ZeroOrNegativeOneBooleanContent);
410 setStackPointerRegisterToSaveRestore(SPU::R1);
412 // We have target-specific dag combine patterns for the following nodes:
413 setTargetDAGCombine(ISD::ADD);
414 setTargetDAGCombine(ISD::ZERO_EXTEND);
415 setTargetDAGCombine(ISD::SIGN_EXTEND);
416 setTargetDAGCombine(ISD::ANY_EXTEND);
418 computeRegisterProperties();
420 // Set pre-RA register scheduler default to BURR, which produces slightly
421 // better code than the default (could also be TDRR, but TargetLowering.h
422 // needs a mod to support that model):
423 setSchedulingPreference(SchedulingForRegPressure);
427 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
429 if (node_names.empty()) {
430 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
431 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
432 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
433 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
434 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
435 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
436 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
437 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
438 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
439 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
440 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
441 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
442 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
446 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
447 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
448 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
449 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
452 "SPUISD::ROTBYTES_LEFT_BITS";
453 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
454 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
455 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
456 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
457 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
460 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
462 return ((i != node_names.end()) ? i->second : 0);
465 //===----------------------------------------------------------------------===//
466 // Return the Cell SPU's SETCC result type
467 //===----------------------------------------------------------------------===//
469 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
470 // i16 and i32 are valid SETCC result types
471 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
474 //===----------------------------------------------------------------------===//
475 // Calling convention code:
476 //===----------------------------------------------------------------------===//
478 #include "SPUGenCallingConv.inc"
480 //===----------------------------------------------------------------------===//
481 // LowerOperation implementation
482 //===----------------------------------------------------------------------===//
484 /// Custom lower loads for CellSPU
486 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
487 within a 16-byte block, we have to rotate to extract the requested element.
489 For extending loads, we also want to ensure that the following sequence is
490 emitted, e.g. for MVT::f32 extending load to MVT::f64:
494 %2 v16i8,ch = rotate %1
495 %3 v4f8, ch = bitconvert %2
496 %4 f32 = vec2perfslot %3
497 %5 f64 = fp_extend %4
501 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
502 LoadSDNode *LN = cast<LoadSDNode>(Op);
503 SDValue the_chain = LN->getChain();
504 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
505 MVT InVT = LN->getMemoryVT();
506 MVT OutVT = Op.getValueType();
507 ISD::LoadExtType ExtType = LN->getExtensionType();
508 unsigned alignment = LN->getAlignment();
509 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
511 switch (LN->getAddressingMode()) {
512 case ISD::UNINDEXED: {
514 SDValue basePtr = LN->getBasePtr();
517 if (alignment == 16) {
520 // Special cases for a known aligned load to simplify the base pointer
521 // and the rotation amount:
522 if (basePtr.getOpcode() == ISD::ADD
523 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
524 // Known offset into basePtr
525 int64_t offset = CN->getSExtValue();
526 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
531 rotate = DAG.getConstant(rotamt, MVT::i16);
533 // Simplify the base pointer for this case:
534 basePtr = basePtr.getOperand(0);
535 if ((offset & ~0xf) > 0) {
536 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
538 DAG.getConstant((offset & ~0xf), PtrVT));
540 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
541 || (basePtr.getOpcode() == SPUISD::IndirectAddr
542 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
543 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
544 // Plain aligned a-form address: rotate into preferred slot
545 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
546 int64_t rotamt = -vtm->prefslot_byte;
549 rotate = DAG.getConstant(rotamt, MVT::i16);
551 // Offset the rotate amount by the basePtr and the preferred slot
553 int64_t rotamt = -vtm->prefslot_byte;
556 rotate = DAG.getNode(ISD::ADD, PtrVT,
558 DAG.getConstant(rotamt, PtrVT));
561 // Unaligned load: must be more pessimistic about addressing modes:
562 if (basePtr.getOpcode() == ISD::ADD) {
563 MachineFunction &MF = DAG.getMachineFunction();
564 MachineRegisterInfo &RegInfo = MF.getRegInfo();
565 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
568 SDValue Op0 = basePtr.getOperand(0);
569 SDValue Op1 = basePtr.getOperand(1);
571 if (isa<ConstantSDNode>(Op1)) {
572 // Convert the (add <ptr>, <const>) to an indirect address contained
573 // in a register. Note that this is done because we need to avoid
574 // creating a 0(reg) d-form address due to the SPU's block loads.
575 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
576 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
577 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
579 // Convert the (add <arg1>, <arg2>) to an indirect address, which
580 // will likely be lowered as a reg(reg) x-form address.
581 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
584 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
586 DAG.getConstant(0, PtrVT));
589 // Offset the rotate amount by the basePtr and the preferred slot
591 rotate = DAG.getNode(ISD::ADD, PtrVT,
593 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
596 // Re-emit as a v16i8 vector load
597 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
598 LN->getSrcValue(), LN->getSrcValueOffset(),
599 LN->isVolatile(), 16);
602 the_chain = result.getValue(1);
604 // Rotate into the preferred slot:
605 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
606 result.getValue(0), rotate);
608 // Convert the loaded v16i8 vector to the appropriate vector type
609 // specified by the operand:
610 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
611 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
612 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
614 // Handle extending loads by extending the scalar result:
615 if (ExtType == ISD::SEXTLOAD) {
616 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
617 } else if (ExtType == ISD::ZEXTLOAD) {
618 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
619 } else if (ExtType == ISD::EXTLOAD) {
620 unsigned NewOpc = ISD::ANY_EXTEND;
622 if (OutVT.isFloatingPoint())
623 NewOpc = ISD::FP_EXTEND;
625 result = DAG.getNode(NewOpc, OutVT, result);
628 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
629 SDValue retops[2] = {
634 result = DAG.getNode(SPUISD::LDRESULT, retvts,
635 retops, sizeof(retops) / sizeof(retops[0]));
642 case ISD::LAST_INDEXED_MODE:
643 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
645 cerr << (unsigned) LN->getAddressingMode() << "\n";
653 /// Custom lower stores for CellSPU
655 All CellSPU stores are aligned to 16-byte boundaries, so for elements
656 within a 16-byte block, we have to generate a shuffle to insert the
657 requested element into its place, then store the resulting block.
660 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
661 StoreSDNode *SN = cast<StoreSDNode>(Op);
662 SDValue Value = SN->getValue();
663 MVT VT = Value.getValueType();
664 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
665 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
666 unsigned alignment = SN->getAlignment();
668 switch (SN->getAddressingMode()) {
669 case ISD::UNINDEXED: {
670 // The vector type we really want to load from the 16-byte chunk.
671 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
672 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
674 SDValue alignLoadVec;
675 SDValue basePtr = SN->getBasePtr();
676 SDValue the_chain = SN->getChain();
677 SDValue insertEltOffs;
679 if (alignment == 16) {
682 // Special cases for a known aligned load to simplify the base pointer
683 // and insertion byte:
684 if (basePtr.getOpcode() == ISD::ADD
685 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
686 // Known offset into basePtr
687 int64_t offset = CN->getSExtValue();
689 // Simplify the base pointer for this case:
690 basePtr = basePtr.getOperand(0);
691 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
693 DAG.getConstant((offset & 0xf), PtrVT));
695 if ((offset & ~0xf) > 0) {
696 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
698 DAG.getConstant((offset & ~0xf), PtrVT));
701 // Otherwise, assume it's at byte 0 of basePtr
702 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
704 DAG.getConstant(0, PtrVT));
707 // Unaligned load: must be more pessimistic about addressing modes:
708 if (basePtr.getOpcode() == ISD::ADD) {
709 MachineFunction &MF = DAG.getMachineFunction();
710 MachineRegisterInfo &RegInfo = MF.getRegInfo();
711 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
714 SDValue Op0 = basePtr.getOperand(0);
715 SDValue Op1 = basePtr.getOperand(1);
717 if (isa<ConstantSDNode>(Op1)) {
718 // Convert the (add <ptr>, <const>) to an indirect address contained
719 // in a register. Note that this is done because we need to avoid
720 // creating a 0(reg) d-form address due to the SPU's block loads.
721 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
722 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
723 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
725 // Convert the (add <arg1>, <arg2>) to an indirect address, which
726 // will likely be lowered as a reg(reg) x-form address.
727 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
730 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
732 DAG.getConstant(0, PtrVT));
735 // Insertion point is solely determined by basePtr's contents
736 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
738 DAG.getConstant(0, PtrVT));
741 // Re-emit as a v16i8 vector load
742 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
743 SN->getSrcValue(), SN->getSrcValueOffset(),
744 SN->isVolatile(), 16);
747 the_chain = alignLoadVec.getValue(1);
749 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
750 SDValue theValue = SN->getValue();
754 && (theValue.getOpcode() == ISD::AssertZext
755 || theValue.getOpcode() == ISD::AssertSext)) {
756 // Drill down and get the value for zero- and sign-extended
758 theValue = theValue.getOperand(0);
761 // If the base pointer is already a D-form address, then just create
762 // a new D-form address with a slot offset and the orignal base pointer.
763 // Otherwise generate a D-form address with the slot offset relative
764 // to the stack pointer, which is always aligned.
766 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
767 cerr << "CellSPU LowerSTORE: basePtr = ";
768 basePtr.getNode()->dump(&DAG);
773 SDValue insertEltOp =
774 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
775 SDValue vectorizeOp =
776 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
778 result = DAG.getNode(SPUISD::SHUFB, vecVT,
779 vectorizeOp, alignLoadVec,
780 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
782 result = DAG.getStore(the_chain, result, basePtr,
783 LN->getSrcValue(), LN->getSrcValueOffset(),
784 LN->isVolatile(), LN->getAlignment());
786 #if 0 && !defined(NDEBUG)
787 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
788 const SDValue ¤tRoot = DAG.getRoot();
791 cerr << "------- CellSPU:LowerStore result:\n";
794 DAG.setRoot(currentRoot);
805 case ISD::LAST_INDEXED_MODE:
806 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
808 cerr << (unsigned) SN->getAddressingMode() << "\n";
816 //! Generate the address of a constant pool entry.
818 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
819 MVT PtrVT = Op.getValueType();
820 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
821 Constant *C = CP->getConstVal();
822 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
823 SDValue Zero = DAG.getConstant(0, PtrVT);
824 const TargetMachine &TM = DAG.getTarget();
826 if (TM.getRelocationModel() == Reloc::Static) {
827 if (!ST->usingLargeMem()) {
828 // Just return the SDValue with the constant pool address in it.
829 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
831 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
832 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
833 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
838 "LowerConstantPool: Relocation model other than static"
843 //! Alternate entry point for generating the address of a constant pool entry
845 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
846 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
850 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
851 MVT PtrVT = Op.getValueType();
852 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
853 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
854 SDValue Zero = DAG.getConstant(0, PtrVT);
855 const TargetMachine &TM = DAG.getTarget();
857 if (TM.getRelocationModel() == Reloc::Static) {
858 if (!ST->usingLargeMem()) {
859 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
861 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
862 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
863 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
868 "LowerJumpTable: Relocation model other than static not supported.");
873 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
874 MVT PtrVT = Op.getValueType();
875 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
876 GlobalValue *GV = GSDN->getGlobal();
877 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
878 const TargetMachine &TM = DAG.getTarget();
879 SDValue Zero = DAG.getConstant(0, PtrVT);
881 if (TM.getRelocationModel() == Reloc::Static) {
882 if (!ST->usingLargeMem()) {
883 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
885 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
886 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
887 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
890 cerr << "LowerGlobalAddress: Relocation model other than static not "
899 //! Custom lower double precision floating point constants
901 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
902 MVT VT = Op.getValueType();
904 if (VT == MVT::f64) {
905 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
908 "LowerConstantFP: Node is not ConstantFPSDNode");
910 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
911 SDValue T = DAG.getConstant(dbits, MVT::i64);
912 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
913 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
914 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
921 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
923 MachineFunction &MF = DAG.getMachineFunction();
924 MachineFrameInfo *MFI = MF.getFrameInfo();
925 MachineRegisterInfo &RegInfo = MF.getRegInfo();
926 SmallVector<SDValue, 48> ArgValues;
927 SDValue Root = Op.getOperand(0);
928 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
930 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
931 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
933 unsigned ArgOffset = SPUFrameInfo::minStackSize();
934 unsigned ArgRegIdx = 0;
935 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
937 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
939 // Add DAG nodes to load the arguments or copy them out of registers.
940 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
941 ArgNo != e; ++ArgNo) {
942 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
943 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
946 if (ArgRegIdx < NumArgRegs) {
947 const TargetRegisterClass *ArgRegClass;
949 switch (ObjectVT.getSimpleVT()) {
951 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
952 << ObjectVT.getMVTString()
957 ArgRegClass = &SPU::R8CRegClass;
960 ArgRegClass = &SPU::R16CRegClass;
963 ArgRegClass = &SPU::R32CRegClass;
966 ArgRegClass = &SPU::R64CRegClass;
969 ArgRegClass = &SPU::GPRCRegClass;
972 ArgRegClass = &SPU::R32FPRegClass;
975 ArgRegClass = &SPU::R64FPRegClass;
983 ArgRegClass = &SPU::VECREGRegClass;
987 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
988 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
989 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
992 // We need to load the argument to a virtual register if we determined
993 // above that we ran out of physical registers of the appropriate type
994 // or we're forced to do vararg
995 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
996 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
997 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
998 ArgOffset += StackSlotSize;
1001 ArgValues.push_back(ArgVal);
1003 Root = ArgVal.getOperand(0);
1008 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1009 // We will spill (79-3)+1 registers to the stack
1010 SmallVector<SDValue, 79-3+1> MemOps;
1012 // Create the frame slot
1014 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1015 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1016 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1017 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1018 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1019 Root = Store.getOperand(0);
1020 MemOps.push_back(Store);
1022 // Increment address by stack slot size for the next stored argument
1023 ArgOffset += StackSlotSize;
1025 if (!MemOps.empty())
1026 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1029 ArgValues.push_back(Root);
1031 // Return the new list of results.
1032 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1033 &ArgValues[0], ArgValues.size());
1036 /// isLSAAddress - Return the immediate to use if the specified
1037 /// value is representable as a LSA address.
1038 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1039 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1042 int Addr = C->getZExtValue();
1043 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1044 (Addr << 14 >> 14) != Addr)
1045 return 0; // Top 14 bits have to be sext of immediate.
1047 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1051 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1052 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1053 SDValue Chain = TheCall->getChain();
1054 SDValue Callee = TheCall->getCallee();
1055 unsigned NumOps = TheCall->getNumArgs();
1056 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1057 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1058 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1060 // Handy pointer type
1061 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1063 // Accumulate how many bytes are to be pushed on the stack, including the
1064 // linkage area, and parameter passing area. According to the SPU ABI,
1065 // we minimally need space for [LR] and [SP]
1066 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1068 // Set up a copy of the stack pointer for use loading and storing any
1069 // arguments that may not fit in the registers available for argument
1071 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1073 // Figure out which arguments are going to go in registers, and which in
1075 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1076 unsigned ArgRegIdx = 0;
1078 // Keep track of registers passing arguments
1079 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1080 // And the arguments passed on the stack
1081 SmallVector<SDValue, 8> MemOpChains;
1083 for (unsigned i = 0; i != NumOps; ++i) {
1084 SDValue Arg = TheCall->getArg(i);
1086 // PtrOff will be used to store the current argument to the stack if a
1087 // register cannot be found for it.
1088 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1089 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1091 switch (Arg.getValueType().getSimpleVT()) {
1092 default: assert(0 && "Unexpected ValueType for argument!");
1098 if (ArgRegIdx != NumArgRegs) {
1099 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1101 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1102 ArgOffset += StackSlotSize;
1107 if (ArgRegIdx != NumArgRegs) {
1108 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1110 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1111 ArgOffset += StackSlotSize;
1120 if (ArgRegIdx != NumArgRegs) {
1121 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1123 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1124 ArgOffset += StackSlotSize;
1130 // Update number of stack bytes actually used, insert a call sequence start
1131 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1132 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1135 if (!MemOpChains.empty()) {
1136 // Adjust the stack pointer for the stack arguments.
1137 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1138 &MemOpChains[0], MemOpChains.size());
1141 // Build a sequence of copy-to-reg nodes chained together with token chain
1142 // and flag operands which copy the outgoing args into the appropriate regs.
1144 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1145 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1147 InFlag = Chain.getValue(1);
1150 SmallVector<SDValue, 8> Ops;
1151 unsigned CallOpc = SPUISD::CALL;
1153 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1154 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1155 // node so that legalize doesn't hack it.
1156 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1157 GlobalValue *GV = G->getGlobal();
1158 MVT CalleeVT = Callee.getValueType();
1159 SDValue Zero = DAG.getConstant(0, PtrVT);
1160 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1162 if (!ST->usingLargeMem()) {
1163 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1164 // style calls, otherwise, external symbols are BRASL calls. This assumes
1165 // that declared/defined symbols are in the same compilation unit and can
1166 // be reached through PC-relative jumps.
1169 // This may be an unsafe assumption for JIT and really large compilation
1171 if (GV->isDeclaration()) {
1172 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1174 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1177 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1179 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1181 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1182 MVT CalleeVT = Callee.getValueType();
1183 SDValue Zero = DAG.getConstant(0, PtrVT);
1184 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1185 Callee.getValueType());
1187 if (!ST->usingLargeMem()) {
1188 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1190 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1192 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1193 // If this is an absolute destination address that appears to be a legal
1194 // local store address, use the munged value.
1195 Callee = SDValue(Dest, 0);
1198 Ops.push_back(Chain);
1199 Ops.push_back(Callee);
1201 // Add argument registers to the end of the list so that they are known live
1203 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1204 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1205 RegsToPass[i].second.getValueType()));
1207 if (InFlag.getNode())
1208 Ops.push_back(InFlag);
1209 // Returns a chain and a flag for retval copy to use.
1210 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1211 &Ops[0], Ops.size());
1212 InFlag = Chain.getValue(1);
1214 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1215 DAG.getIntPtrConstant(0, true), InFlag);
1216 if (TheCall->getValueType(0) != MVT::Other)
1217 InFlag = Chain.getValue(1);
1219 SDValue ResultVals[3];
1220 unsigned NumResults = 0;
1222 // If the call has results, copy the values out of the ret val registers.
1223 switch (TheCall->getValueType(0).getSimpleVT()) {
1224 default: assert(0 && "Unexpected ret value!");
1225 case MVT::Other: break;
1227 if (TheCall->getValueType(1) == MVT::i32) {
1228 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1229 ResultVals[0] = Chain.getValue(0);
1230 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1231 Chain.getValue(2)).getValue(1);
1232 ResultVals[1] = Chain.getValue(0);
1235 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1236 ResultVals[0] = Chain.getValue(0);
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1242 ResultVals[0] = Chain.getValue(0);
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i128, InFlag).getValue(1);
1247 ResultVals[0] = Chain.getValue(0);
1252 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1253 InFlag).getValue(1);
1254 ResultVals[0] = Chain.getValue(0);
1263 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1264 InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1270 // If the function returns void, just return the chain.
1271 if (NumResults == 0)
1274 // Otherwise, merge everything together with a MERGE_VALUES node.
1275 ResultVals[NumResults++] = Chain;
1276 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1277 return Res.getValue(Op.getResNo());
1281 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1282 SmallVector<CCValAssign, 16> RVLocs;
1283 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1288 // If this is the first return lowered for this function, add the regs to the
1289 // liveout set for the function.
1290 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1291 for (unsigned i = 0; i != RVLocs.size(); ++i)
1292 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1295 SDValue Chain = Op.getOperand(0);
1298 // Copy the result values into the output registers.
1299 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300 CCValAssign &VA = RVLocs[i];
1301 assert(VA.isRegLoc() && "Can only return in registers!");
1302 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303 Flag = Chain.getValue(1);
1307 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1313 //===----------------------------------------------------------------------===//
1314 // Vector related lowering:
1315 //===----------------------------------------------------------------------===//
1317 static ConstantSDNode *
1318 getVecImm(SDNode *N) {
1319 SDValue OpVal(0, 0);
1321 // Check to see if this buildvec has a single non-undef value in its elements.
1322 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1324 if (OpVal.getNode() == 0)
1325 OpVal = N->getOperand(i);
1326 else if (OpVal != N->getOperand(i))
1330 if (OpVal.getNode() != 0) {
1331 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1336 return 0; // All UNDEF: use implicit def.; not Constant node
1339 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1342 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1344 if (ConstantSDNode *CN = getVecImm(N)) {
1345 uint64_t Value = CN->getZExtValue();
1346 if (ValueType == MVT::i64) {
1347 uint64_t UValue = CN->getZExtValue();
1348 uint32_t upper = uint32_t(UValue >> 32);
1349 uint32_t lower = uint32_t(UValue);
1352 Value = Value >> 32;
1354 if (Value <= 0x3ffff)
1355 return DAG.getTargetConstant(Value, ValueType);
1361 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1362 /// and the value fits into a signed 16-bit constant, and if so, return the
1364 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1366 if (ConstantSDNode *CN = getVecImm(N)) {
1367 int64_t Value = CN->getSExtValue();
1368 if (ValueType == MVT::i64) {
1369 uint64_t UValue = CN->getZExtValue();
1370 uint32_t upper = uint32_t(UValue >> 32);
1371 uint32_t lower = uint32_t(UValue);
1374 Value = Value >> 32;
1376 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1377 return DAG.getTargetConstant(Value, ValueType);
1384 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1385 /// and the value fits into a signed 10-bit constant, and if so, return the
1387 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1389 if (ConstantSDNode *CN = getVecImm(N)) {
1390 int64_t Value = CN->getSExtValue();
1391 if (ValueType == MVT::i64) {
1392 uint64_t UValue = CN->getZExtValue();
1393 uint32_t upper = uint32_t(UValue >> 32);
1394 uint32_t lower = uint32_t(UValue);
1397 Value = Value >> 32;
1399 if (isS10Constant(Value))
1400 return DAG.getTargetConstant(Value, ValueType);
1406 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1407 /// and the value fits into a signed 8-bit constant, and if so, return the
1410 /// @note: The incoming vector is v16i8 because that's the only way we can load
1411 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1413 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1415 if (ConstantSDNode *CN = getVecImm(N)) {
1416 int Value = (int) CN->getZExtValue();
1417 if (ValueType == MVT::i16
1418 && Value <= 0xffff /* truncated from uint64_t */
1419 && ((short) Value >> 8) == ((short) Value & 0xff))
1420 return DAG.getTargetConstant(Value & 0xff, ValueType);
1421 else if (ValueType == MVT::i8
1422 && (Value & 0xff) == Value)
1423 return DAG.getTargetConstant(Value, ValueType);
1429 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1430 /// and the value fits into a signed 16-bit constant, and if so, return the
1432 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1434 if (ConstantSDNode *CN = getVecImm(N)) {
1435 uint64_t Value = CN->getZExtValue();
1436 if ((ValueType == MVT::i32
1437 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1438 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1439 return DAG.getTargetConstant(Value >> 16, ValueType);
1445 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1446 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1447 if (ConstantSDNode *CN = getVecImm(N)) {
1448 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1454 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1455 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1456 if (ConstantSDNode *CN = getVecImm(N)) {
1457 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1463 // If this is a vector of constants or undefs, get the bits. A bit in
1464 // UndefBits is set if the corresponding element of the vector is an
1465 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1466 // zero. Return true if this is not an array of constants, false if it is.
1468 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1469 uint64_t UndefBits[2]) {
1470 // Start with zero'd results.
1471 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1473 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1474 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1475 SDValue OpVal = BV->getOperand(i);
1477 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1478 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1480 uint64_t EltBits = 0;
1481 if (OpVal.getOpcode() == ISD::UNDEF) {
1482 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1483 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1485 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1486 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1487 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1488 const APFloat &apf = CN->getValueAPF();
1489 EltBits = (CN->getValueType(0) == MVT::f32
1490 ? FloatToBits(apf.convertToFloat())
1491 : DoubleToBits(apf.convertToDouble()));
1493 // Nonconstant element.
1497 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1500 //printf("%llx %llx %llx %llx\n",
1501 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1505 /// If this is a splat (repetition) of a value across the whole vector, return
1506 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1507 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1508 /// SplatSize = 1 byte.
1509 static bool isConstantSplat(const uint64_t Bits128[2],
1510 const uint64_t Undef128[2],
1512 uint64_t &SplatBits, uint64_t &SplatUndef,
1514 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1515 // the same as the lower 64-bits, ignoring undefs.
1516 uint64_t Bits64 = Bits128[0] | Bits128[1];
1517 uint64_t Undef64 = Undef128[0] & Undef128[1];
1518 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1519 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1520 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1521 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1523 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1524 if (MinSplatBits < 64) {
1526 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1528 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1529 if (MinSplatBits < 32) {
1531 // If the top 16-bits are different than the lower 16-bits, ignoring
1532 // undefs, we have an i32 splat.
1533 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1534 if (MinSplatBits < 16) {
1535 // If the top 8-bits are different than the lower 8-bits, ignoring
1536 // undefs, we have an i16 splat.
1537 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1538 == ((Bits16 >> 8) & ~Undef16)) {
1539 // Otherwise, we have an 8-bit splat.
1540 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1541 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1547 SplatUndef = Undef16;
1554 SplatUndef = Undef32;
1560 SplatBits = Bits128[0];
1561 SplatUndef = Undef128[0];
1567 return false; // Can't be a splat if two pieces don't match.
1570 //! Lower a BUILD_VECTOR instruction creatively:
1572 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1573 MVT VT = Op.getValueType();
1574 // If this is a vector of constants or undefs, get the bits. A bit in
1575 // UndefBits is set if the corresponding element of the vector is an
1576 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1578 uint64_t VectorBits[2];
1579 uint64_t UndefBits[2];
1580 uint64_t SplatBits, SplatUndef;
1582 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1583 || !isConstantSplat(VectorBits, UndefBits,
1584 VT.getVectorElementType().getSizeInBits(),
1585 SplatBits, SplatUndef, SplatSize))
1586 return SDValue(); // Not a constant vector, not a splat.
1588 switch (VT.getSimpleVT()) {
1590 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1591 << VT.getMVTString()
1596 uint32_t Value32 = uint32_t(SplatBits);
1597 assert(SplatSize == 4
1598 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1599 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1600 SDValue T = DAG.getConstant(Value32, MVT::i32);
1601 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1602 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1606 uint64_t f64val = uint64_t(SplatBits);
1607 assert(SplatSize == 8
1608 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1609 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1610 SDValue T = DAG.getConstant(f64val, MVT::i64);
1611 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1612 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1616 // 8-bit constants have to be expanded to 16-bits
1617 unsigned short Value16 = SplatBits | (SplatBits << 8);
1619 for (int i = 0; i < 8; ++i)
1620 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1621 return DAG.getNode(ISD::BIT_CONVERT, VT,
1622 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1625 unsigned short Value16;
1627 Value16 = (unsigned short) (SplatBits & 0xffff);
1629 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1630 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1632 for (int i = 0; i < 8; ++i) Ops[i] = T;
1633 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1636 unsigned int Value = SplatBits;
1637 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1638 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1641 unsigned int Value = SplatBits;
1642 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1643 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
1646 return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
1654 SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
1655 uint32_t upper = uint32_t(SplatVal >> 32);
1656 uint32_t lower = uint32_t(SplatVal);
1658 if (upper == lower) {
1659 // Magic constant that can be matched by IL, ILA, et. al.
1660 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1661 return DAG.getNode(ISD::BIT_CONVERT, OpVT,
1662 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1663 Val, Val, Val, Val));
1667 SmallVector<SDValue, 16> ShufBytes;
1669 bool upper_special, lower_special;
1671 // NOTE: This code creates common-case shuffle masks that can be easily
1672 // detected as common expressions. It is not attempting to create highly
1673 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1675 // Detect if the upper or lower half is a special shuffle mask pattern:
1676 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1677 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1679 // Create lower vector if not a special pattern
1680 if (!lower_special) {
1681 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1682 LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1683 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1684 LO32C, LO32C, LO32C, LO32C));
1687 // Create upper vector if not a special pattern
1688 if (!upper_special) {
1689 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1690 HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
1691 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1692 HI32C, HI32C, HI32C, HI32C));
1695 // If either upper or lower are special, then the two input operands are
1696 // the same (basically, one of them is a "don't care")
1701 if (lower_special && upper_special) {
1702 // Unhappy situation... both upper and lower are special, so punt with
1703 // a target constant:
1704 SDValue Zero = DAG.getConstant(0, MVT::i32);
1705 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1709 for (int i = 0; i < 4; ++i) {
1711 for (int j = 0; j < 4; ++j) {
1713 bool process_upper, process_lower;
1715 process_upper = (upper_special && (i & 1) == 0);
1716 process_lower = (lower_special && (i & 1) == 1);
1718 if (process_upper || process_lower) {
1719 if ((process_upper && upper == 0)
1720 || (process_lower && lower == 0))
1722 else if ((process_upper && upper == 0xffffffff)
1723 || (process_lower && lower == 0xffffffff))
1725 else if ((process_upper && upper == 0x80000000)
1726 || (process_lower && lower == 0x80000000))
1727 val |= (j == 0 ? 0xe0 : 0x80);
1729 val |= i * 4 + j + ((i & 1) * 16);
1732 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1735 return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
1736 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1737 &ShufBytes[0], ShufBytes.size()));
1741 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1742 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1743 /// permutation vector, V3, is monotonically increasing with one "exception"
1744 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1745 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1746 /// In either case, the net result is going to eventually invoke SHUFB to
1747 /// permute/shuffle the bytes from V1 and V2.
1749 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1750 /// control word for byte/halfword/word insertion. This takes care of a single
1751 /// element move from V2 into V1.
1753 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1754 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1755 SDValue V1 = Op.getOperand(0);
1756 SDValue V2 = Op.getOperand(1);
1757 SDValue PermMask = Op.getOperand(2);
1759 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1761 // If we have a single element being moved from V1 to V2, this can be handled
1762 // using the C*[DX] compute mask instructions, but the vector elements have
1763 // to be monotonically increasing with one exception element.
1764 MVT VecVT = V1.getValueType();
1765 MVT EltVT = VecVT.getVectorElementType();
1766 unsigned EltsFromV2 = 0;
1768 unsigned V2EltIdx0 = 0;
1769 unsigned CurrElt = 0;
1770 unsigned MaxElts = VecVT.getVectorNumElements();
1771 unsigned PrevElt = 0;
1773 bool monotonic = true;
1776 if (EltVT == MVT::i8) {
1778 } else if (EltVT == MVT::i16) {
1780 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1782 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1785 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1787 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1788 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1789 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1792 if (SrcElt >= V2EltIdx0) {
1793 if (1 >= (++EltsFromV2)) {
1794 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1796 } else if (CurrElt != SrcElt) {
1804 if (PrevElt > 0 && SrcElt < MaxElts) {
1805 if ((PrevElt == SrcElt - 1)
1806 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1813 } else if (PrevElt == 0) {
1814 // First time through, need to keep track of previous element
1817 // This isn't a rotation, takes elements from vector 2
1824 if (EltsFromV2 == 1 && monotonic) {
1825 // Compute mask and shuffle
1826 MachineFunction &MF = DAG.getMachineFunction();
1827 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1828 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1829 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1830 // Initialize temporary register to 0
1831 SDValue InitTempReg =
1832 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1833 // Copy register's contents as index in SHUFFLE_MASK:
1834 SDValue ShufMaskOp =
1835 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1836 DAG.getTargetConstant(V2Elt, MVT::i32),
1837 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1838 // Use shuffle mask in SHUFB synthetic instruction:
1839 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1840 } else if (rotate) {
1841 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1843 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1844 V1, DAG.getConstant(rotamt, MVT::i16));
1846 // Convert the SHUFFLE_VECTOR mask's input element units to the
1848 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1850 SmallVector<SDValue, 16> ResultMask;
1851 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1853 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1856 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1858 for (unsigned j = 0; j < BytesPerElement; ++j) {
1859 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1864 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1865 &ResultMask[0], ResultMask.size());
1866 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1870 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1871 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1873 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1874 // For a constant, build the appropriate constant vector, which will
1875 // eventually simplify to a vector register load.
1877 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1878 SmallVector<SDValue, 16> ConstVecValues;
1882 // Create a constant vector:
1883 switch (Op.getValueType().getSimpleVT()) {
1884 default: assert(0 && "Unexpected constant value type in "
1885 "LowerSCALAR_TO_VECTOR");
1886 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1887 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1888 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1889 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1890 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1891 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1894 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1895 for (size_t j = 0; j < n_copies; ++j)
1896 ConstVecValues.push_back(CValue);
1898 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1899 &ConstVecValues[0], ConstVecValues.size());
1901 // Otherwise, copy the value from one register to another:
1902 switch (Op0.getValueType().getSimpleVT()) {
1903 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1910 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1917 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1918 MVT VT = Op.getValueType();
1919 SDValue N = Op.getOperand(0);
1920 SDValue Elt = Op.getOperand(1);
1923 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1924 // Constant argument:
1925 int EltNo = (int) C->getZExtValue();
1928 if (VT == MVT::i8 && EltNo >= 16)
1929 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1930 else if (VT == MVT::i16 && EltNo >= 8)
1931 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1932 else if (VT == MVT::i32 && EltNo >= 4)
1933 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1934 else if (VT == MVT::i64 && EltNo >= 2)
1935 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1937 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1938 // i32 and i64: Element 0 is the preferred slot
1939 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1942 // Need to generate shuffle mask and extract:
1943 int prefslot_begin = -1, prefslot_end = -1;
1944 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1946 switch (VT.getSimpleVT()) {
1948 assert(false && "Invalid value type!");
1950 prefslot_begin = prefslot_end = 3;
1954 prefslot_begin = 2; prefslot_end = 3;
1959 prefslot_begin = 0; prefslot_end = 3;
1964 prefslot_begin = 0; prefslot_end = 7;
1969 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1970 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1972 unsigned int ShufBytes[16];
1973 for (int i = 0; i < 16; ++i) {
1974 // zero fill uppper part of preferred slot, don't care about the
1976 unsigned int mask_val;
1977 if (i <= prefslot_end) {
1979 ((i < prefslot_begin)
1981 : elt_byte + (i - prefslot_begin));
1983 ShufBytes[i] = mask_val;
1985 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1988 SDValue ShufMask[4];
1989 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1990 unsigned bidx = i * 4;
1991 unsigned int bits = ((ShufBytes[bidx] << 24) |
1992 (ShufBytes[bidx+1] << 16) |
1993 (ShufBytes[bidx+2] << 8) |
1995 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1998 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2000 sizeof(ShufMask) / sizeof(ShufMask[0]));
2002 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2003 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2004 N, N, ShufMaskVec));
2006 // Variable index: Rotate the requested element into slot 0, then replicate
2007 // slot 0 across the vector
2008 MVT VecVT = N.getValueType();
2009 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2010 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2014 // Make life easier by making sure the index is zero-extended to i32
2015 if (Elt.getValueType() != MVT::i32)
2016 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2018 // Scale the index to a bit/byte shift quantity
2020 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2021 unsigned scaleShift = scaleFactor.logBase2();
2024 if (scaleShift > 0) {
2025 // Scale the shift factor:
2026 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2027 DAG.getConstant(scaleShift, MVT::i32));
2030 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2032 // Replicate the bytes starting at byte 0 across the entire vector (for
2033 // consistency with the notion of a unified register set)
2036 switch (VT.getSimpleVT()) {
2038 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2042 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2043 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2048 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2049 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2055 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2056 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2062 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2063 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2064 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2065 loFactor, hiFactor);
2070 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2071 DAG.getNode(SPUISD::SHUFB, VecVT,
2072 vecShift, vecShift, replicate));
2078 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2079 SDValue VecOp = Op.getOperand(0);
2080 SDValue ValOp = Op.getOperand(1);
2081 SDValue IdxOp = Op.getOperand(2);
2082 MVT VT = Op.getValueType();
2084 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2085 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2087 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2088 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2089 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2090 DAG.getRegister(SPU::R1, PtrVT),
2091 DAG.getConstant(CN->getSExtValue(), PtrVT));
2092 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2095 DAG.getNode(SPUISD::SHUFB, VT,
2096 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2098 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2103 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2104 const TargetLowering &TLI)
2106 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2107 MVT ShiftVT = TLI.getShiftAmountTy();
2109 assert(Op.getValueType() == MVT::i8);
2112 assert(0 && "Unhandled i8 math operator");
2116 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2118 SDValue N1 = Op.getOperand(1);
2119 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2120 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2121 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2122 DAG.getNode(Opc, MVT::i16, N0, N1));
2127 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2129 SDValue N1 = Op.getOperand(1);
2130 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2131 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2132 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2133 DAG.getNode(Opc, MVT::i16, N0, N1));
2137 SDValue N1 = Op.getOperand(1);
2139 N0 = (N0.getOpcode() != ISD::Constant
2140 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2141 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2143 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2146 N1 = (N1.getOpcode() != ISD::Constant
2147 ? DAG.getNode(N1Opc, ShiftVT, N1)
2148 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2149 TLI.getShiftAmountTy()));
2151 DAG.getNode(ISD::OR, MVT::i16, N0,
2152 DAG.getNode(ISD::SHL, MVT::i16,
2153 N0, DAG.getConstant(8, MVT::i32)));
2154 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2155 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2159 SDValue N1 = Op.getOperand(1);
2161 N0 = (N0.getOpcode() != ISD::Constant
2162 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2163 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2165 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2168 N1 = (N1.getOpcode() != ISD::Constant
2169 ? DAG.getNode(N1Opc, ShiftVT, N1)
2170 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2171 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2172 DAG.getNode(Opc, MVT::i16, N0, N1));
2175 SDValue N1 = Op.getOperand(1);
2177 N0 = (N0.getOpcode() != ISD::Constant
2178 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2179 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2181 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2184 N1 = (N1.getOpcode() != ISD::Constant
2185 ? DAG.getNode(N1Opc, ShiftVT, N1)
2186 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2188 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2189 DAG.getNode(Opc, MVT::i16, N0, N1));
2192 SDValue N1 = Op.getOperand(1);
2194 N0 = (N0.getOpcode() != ISD::Constant
2195 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2196 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2198 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2199 N1 = (N1.getOpcode() != ISD::Constant
2200 ? DAG.getNode(N1Opc, MVT::i16, N1)
2201 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2203 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2204 DAG.getNode(Opc, MVT::i16, N0, N1));
2212 //! Generate the carry-generate shuffle mask.
2213 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
2214 SmallVector<SDValue, 16 > ShufBytes;
2216 // Create the shuffle mask for "rotating" the borrow up one register slot
2217 // once the borrow is generated.
2218 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2219 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2220 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2221 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2223 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2224 &ShufBytes[0], ShufBytes.size());
2227 //! Generate the borrow-generate shuffle mask
2228 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
2229 SmallVector<SDValue, 16 > ShufBytes;
2231 // Create the shuffle mask for "rotating" the borrow up one register slot
2232 // once the borrow is generated.
2233 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2234 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2235 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2236 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2238 return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2239 &ShufBytes[0], ShufBytes.size());
2242 //! Lower byte immediate operations for v16i8 vectors:
2244 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2247 MVT VT = Op.getValueType();
2249 ConstVec = Op.getOperand(0);
2250 Arg = Op.getOperand(1);
2251 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2252 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2253 ConstVec = ConstVec.getOperand(0);
2255 ConstVec = Op.getOperand(1);
2256 Arg = Op.getOperand(0);
2257 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2258 ConstVec = ConstVec.getOperand(0);
2263 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2264 uint64_t VectorBits[2];
2265 uint64_t UndefBits[2];
2266 uint64_t SplatBits, SplatUndef;
2269 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2270 && isConstantSplat(VectorBits, UndefBits,
2271 VT.getVectorElementType().getSizeInBits(),
2272 SplatBits, SplatUndef, SplatSize)) {
2274 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2275 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2277 // Turn the BUILD_VECTOR into a set of target constants:
2278 for (size_t i = 0; i < tcVecSize; ++i)
2281 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2282 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2286 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2287 // lowered. Return the operation, rather than a null SDValue.
2291 //! Custom lowering for CTPOP (count population)
2293 Custom lowering code that counts the number ones in the input
2294 operand. SPU has such an instruction, but it counts the number of
2295 ones per byte, which then have to be accumulated.
2297 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2298 MVT VT = Op.getValueType();
2299 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2301 switch (VT.getSimpleVT()) {
2303 assert(false && "Invalid value type!");
2305 SDValue N = Op.getOperand(0);
2306 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2308 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2309 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2311 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2315 MachineFunction &MF = DAG.getMachineFunction();
2316 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2318 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2320 SDValue N = Op.getOperand(0);
2321 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2322 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2323 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2325 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2326 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2328 // CNTB_result becomes the chain to which all of the virtual registers
2329 // CNTB_reg, SUM1_reg become associated:
2330 SDValue CNTB_result =
2331 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2333 SDValue CNTB_rescopy =
2334 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2336 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2338 return DAG.getNode(ISD::AND, MVT::i16,
2339 DAG.getNode(ISD::ADD, MVT::i16,
2340 DAG.getNode(ISD::SRL, MVT::i16,
2347 MachineFunction &MF = DAG.getMachineFunction();
2348 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2350 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2351 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2353 SDValue N = Op.getOperand(0);
2354 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2355 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2356 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2357 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2359 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2360 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2362 // CNTB_result becomes the chain to which all of the virtual registers
2363 // CNTB_reg, SUM1_reg become associated:
2364 SDValue CNTB_result =
2365 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2367 SDValue CNTB_rescopy =
2368 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2371 DAG.getNode(ISD::SRL, MVT::i32,
2372 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2375 DAG.getNode(ISD::ADD, MVT::i32,
2376 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2378 SDValue Sum1_rescopy =
2379 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2382 DAG.getNode(ISD::SRL, MVT::i32,
2383 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2386 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2387 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2389 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2399 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2401 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2402 All conversions to i64 are expanded to a libcall.
2404 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2405 SPUTargetLowering &TLI) {
2406 MVT OpVT = Op.getValueType();
2407 SDValue Op0 = Op.getOperand(0);
2408 MVT Op0VT = Op0.getValueType();
2410 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2411 || OpVT == MVT::i64) {
2412 // Convert f32 / f64 to i32 / i64 via libcall.
2414 (Op.getOpcode() == ISD::FP_TO_SINT)
2415 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2416 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2417 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2419 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2422 return Op; // return unmolested, legalized op
2425 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2427 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2428 All conversions from i64 are expanded to a libcall.
2430 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2431 SPUTargetLowering &TLI) {
2432 MVT OpVT = Op.getValueType();
2433 SDValue Op0 = Op.getOperand(0);
2434 MVT Op0VT = Op0.getValueType();
2436 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2437 || Op0VT == MVT::i64) {
2438 // Convert i32, i64 to f64 via libcall:
2440 (Op.getOpcode() == ISD::SINT_TO_FP)
2441 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2442 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2443 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2445 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2448 return Op; // return unmolested, legalized
2451 //! Lower ISD::SETCC
2453 This handles MVT::f64 (double floating point) condition lowering
2455 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2456 const TargetLowering &TLI) {
2457 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2458 DebugLoc dl = Op.getNode()->getDebugLoc();
2459 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2461 SDValue lhs = Op.getOperand(0);
2462 SDValue rhs = Op.getOperand(1);
2463 MVT lhsVT = lhs.getValueType();
2464 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2466 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2467 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2468 MVT IntVT(MVT::i64);
2470 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2471 // selected to a NOP:
2472 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2474 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2475 DAG.getNode(ISD::SRL, dl, IntVT,
2476 i64lhs, DAG.getConstant(32, MVT::i32)));
2477 SDValue lhsHi32abs =
2478 DAG.getNode(ISD::AND, dl, MVT::i32,
2479 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2481 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2483 // SETO and SETUO only use the lhs operand:
2484 if (CC->get() == ISD::SETO) {
2485 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2487 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2488 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2489 DAG.getSetCC(dl, ccResultVT,
2490 lhs, DAG.getConstantFP(0.0, lhsVT),
2492 DAG.getConstant(ccResultAllOnes, ccResultVT));
2493 } else if (CC->get() == ISD::SETUO) {
2494 // Evaluates to true if Op0 is [SQ]NaN
2495 return DAG.getNode(ISD::AND, dl, ccResultVT,
2496 DAG.getSetCC(dl, ccResultVT,
2498 DAG.getConstant(0x7ff00000, MVT::i32),
2500 DAG.getSetCC(dl, ccResultVT,
2502 DAG.getConstant(0, MVT::i32),
2506 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
2508 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2509 DAG.getNode(ISD::SRL, dl, IntVT,
2510 i64rhs, DAG.getConstant(32, MVT::i32)));
2512 // If a value is negative, subtract from the sign magnitude constant:
2513 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2515 // Convert the sign-magnitude representation into 2's complement:
2516 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2517 lhsHi32, DAG.getConstant(31, MVT::i32));
2518 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2520 DAG.getNode(ISD::SELECT, dl, IntVT,
2521 lhsSelectMask, lhsSignMag2TC, i64lhs);
2523 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2524 rhsHi32, DAG.getConstant(31, MVT::i32));
2525 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2527 DAG.getNode(ISD::SELECT, dl, IntVT,
2528 rhsSelectMask, rhsSignMag2TC, i64rhs);
2532 switch (CC->get()) {
2535 compareOp = ISD::SETEQ; break;
2538 compareOp = ISD::SETGT; break;
2541 compareOp = ISD::SETGE; break;
2544 compareOp = ISD::SETLT; break;
2547 compareOp = ISD::SETLE; break;
2550 compareOp = ISD::SETNE; break;
2552 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2558 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2559 (ISD::CondCode) compareOp);
2561 if ((CC->get() & 0x8) == 0) {
2562 // Ordered comparison:
2563 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2564 lhs, DAG.getConstantFP(0.0, MVT::f64),
2566 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2567 rhs, DAG.getConstantFP(0.0, MVT::f64),
2569 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2571 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2577 //! Lower ISD::SELECT_CC
2579 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2582 \note Need to revisit this in the future: if the code path through the true
2583 and false value computations is longer than the latency of a branch (6
2584 cycles), then it would be more advantageous to branch and insert a new basic
2585 block and branch on the condition. However, this code does not make that
2586 assumption, given the simplisitc uses so far.
2589 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2590 const TargetLowering &TLI) {
2591 MVT VT = Op.getValueType();
2592 SDValue lhs = Op.getOperand(0);
2593 SDValue rhs = Op.getOperand(1);
2594 SDValue trueval = Op.getOperand(2);
2595 SDValue falseval = Op.getOperand(3);
2596 SDValue condition = Op.getOperand(4);
2598 // NOTE: SELB's arguments: $rA, $rB, $mask
2600 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2601 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2602 // condition was true and 0s where the condition was false. Hence, the
2603 // arguments to SELB get reversed.
2605 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2606 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2607 // with another "cannot select select_cc" assert:
2609 SDValue compare = DAG.getNode(ISD::SETCC,
2610 TLI.getSetCCResultType(Op.getValueType()),
2611 lhs, rhs, condition);
2612 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2615 //! Custom lower ISD::TRUNCATE
2616 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2618 MVT VT = Op.getValueType();
2619 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2620 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2622 SDValue Op0 = Op.getOperand(0);
2623 MVT Op0VT = Op0.getValueType();
2624 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2626 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2627 // Create shuffle mask, least significant doubleword of quadword
2628 unsigned maskHigh = 0x08090a0b;
2629 unsigned maskLow = 0x0c0d0e0f;
2630 // Use a shuffle to perform the truncation
2631 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2632 DAG.getConstant(maskHigh, MVT::i32),
2633 DAG.getConstant(maskLow, MVT::i32),
2634 DAG.getConstant(maskHigh, MVT::i32),
2635 DAG.getConstant(maskLow, MVT::i32));
2638 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2640 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2641 PromoteScalar, PromoteScalar, shufMask);
2643 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2644 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2647 return SDValue(); // Leave the truncate unmolested
2650 //! Custom (target-specific) lowering entry point
2652 This is where LLVM's DAG selection process calls to do target-specific
2656 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2658 unsigned Opc = (unsigned) Op.getOpcode();
2659 MVT VT = Op.getValueType();
2663 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2664 cerr << "Op.getOpcode() = " << Opc << "\n";
2665 cerr << "*Op.getNode():\n";
2666 Op.getNode()->dump();
2673 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2675 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2676 case ISD::ConstantPool:
2677 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2678 case ISD::GlobalAddress:
2679 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2680 case ISD::JumpTable:
2681 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2682 case ISD::ConstantFP:
2683 return LowerConstantFP(Op, DAG);
2684 case ISD::FORMAL_ARGUMENTS:
2685 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2687 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2689 return LowerRET(Op, DAG, getTargetMachine());
2691 // i8, i64 math ops:
2700 return LowerI8Math(Op, DAG, Opc, *this);
2704 case ISD::FP_TO_SINT:
2705 case ISD::FP_TO_UINT:
2706 return LowerFP_TO_INT(Op, DAG, *this);
2708 case ISD::SINT_TO_FP:
2709 case ISD::UINT_TO_FP:
2710 return LowerINT_TO_FP(Op, DAG, *this);
2712 // Vector-related lowering.
2713 case ISD::BUILD_VECTOR:
2714 return LowerBUILD_VECTOR(Op, DAG);
2715 case ISD::SCALAR_TO_VECTOR:
2716 return LowerSCALAR_TO_VECTOR(Op, DAG);
2717 case ISD::VECTOR_SHUFFLE:
2718 return LowerVECTOR_SHUFFLE(Op, DAG);
2719 case ISD::EXTRACT_VECTOR_ELT:
2720 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2721 case ISD::INSERT_VECTOR_ELT:
2722 return LowerINSERT_VECTOR_ELT(Op, DAG);
2724 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2728 return LowerByteImmed(Op, DAG);
2730 // Vector and i8 multiply:
2733 return LowerI8Math(Op, DAG, Opc, *this);
2736 return LowerCTPOP(Op, DAG);
2738 case ISD::SELECT_CC:
2739 return LowerSELECT_CC(Op, DAG, *this);
2742 return LowerSETCC(Op, DAG, *this);
2745 return LowerTRUNCATE(Op, DAG);
2751 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2752 SmallVectorImpl<SDValue>&Results,
2756 unsigned Opc = (unsigned) N->getOpcode();
2757 MVT OpVT = N->getValueType(0);
2761 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2762 cerr << "Op.getOpcode() = " << Opc << "\n";
2763 cerr << "*Op.getNode():\n";
2771 /* Otherwise, return unchanged */
2774 //===----------------------------------------------------------------------===//
2775 // Target Optimization Hooks
2776 //===----------------------------------------------------------------------===//
2779 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2782 TargetMachine &TM = getTargetMachine();
2784 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2785 SelectionDAG &DAG = DCI.DAG;
2786 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2787 MVT NodeVT = N->getValueType(0); // The node's value type
2788 MVT Op0VT = Op0.getValueType(); // The first operand's result
2789 SDValue Result; // Initially, empty result
2791 switch (N->getOpcode()) {
2794 SDValue Op1 = N->getOperand(1);
2796 if (Op0.getOpcode() == SPUISD::IndirectAddr
2797 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2798 // Normalize the operands to reduce repeated code
2799 SDValue IndirectArg = Op0, AddArg = Op1;
2801 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2806 if (isa<ConstantSDNode>(AddArg)) {
2807 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2808 SDValue IndOp1 = IndirectArg.getOperand(1);
2810 if (CN0->isNullValue()) {
2811 // (add (SPUindirect <arg>, <arg>), 0) ->
2812 // (SPUindirect <arg>, <arg>)
2814 #if !defined(NDEBUG)
2815 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2817 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2818 << "With: (SPUindirect <arg>, <arg>)\n";
2823 } else if (isa<ConstantSDNode>(IndOp1)) {
2824 // (add (SPUindirect <arg>, <const>), <const>) ->
2825 // (SPUindirect <arg>, <const + const>)
2826 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2827 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2828 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2830 #if !defined(NDEBUG)
2831 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2833 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2834 << "), " << CN0->getSExtValue() << ")\n"
2835 << "With: (SPUindirect <arg>, "
2836 << combinedConst << ")\n";
2840 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2841 IndirectArg, combinedValue);
2847 case ISD::SIGN_EXTEND:
2848 case ISD::ZERO_EXTEND:
2849 case ISD::ANY_EXTEND: {
2850 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2851 // (any_extend (SPUextract_elt0 <arg>)) ->
2852 // (SPUextract_elt0 <arg>)
2853 // Types must match, however...
2854 #if !defined(NDEBUG)
2855 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2856 cerr << "\nReplace: ";
2859 Op0.getNode()->dump(&DAG);
2868 case SPUISD::IndirectAddr: {
2869 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2870 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2871 if (CN != 0 && CN->getZExtValue() == 0) {
2872 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2873 // (SPUaform <addr>, 0)
2875 DEBUG(cerr << "Replace: ");
2876 DEBUG(N->dump(&DAG));
2877 DEBUG(cerr << "\nWith: ");
2878 DEBUG(Op0.getNode()->dump(&DAG));
2879 DEBUG(cerr << "\n");
2883 } else if (Op0.getOpcode() == ISD::ADD) {
2884 SDValue Op1 = N->getOperand(1);
2885 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2886 // (SPUindirect (add <arg>, <arg>), 0) ->
2887 // (SPUindirect <arg>, <arg>)
2888 if (CN1->isNullValue()) {
2890 #if !defined(NDEBUG)
2891 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2893 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2894 << "With: (SPUindirect <arg>, <arg>)\n";
2898 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2899 Op0.getOperand(0), Op0.getOperand(1));
2905 case SPUISD::SHLQUAD_L_BITS:
2906 case SPUISD::SHLQUAD_L_BYTES:
2907 case SPUISD::VEC_SHL:
2908 case SPUISD::VEC_SRL:
2909 case SPUISD::VEC_SRA:
2910 case SPUISD::ROTBYTES_LEFT: {
2911 SDValue Op1 = N->getOperand(1);
2913 // Kill degenerate vector shifts:
2914 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2915 if (CN->isNullValue()) {
2921 case SPUISD::PREFSLOT2VEC: {
2922 switch (Op0.getOpcode()) {
2925 case ISD::ANY_EXTEND:
2926 case ISD::ZERO_EXTEND:
2927 case ISD::SIGN_EXTEND: {
2928 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2930 // but only if the SPUprefslot2vec and <arg> types match.
2931 SDValue Op00 = Op0.getOperand(0);
2932 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2933 SDValue Op000 = Op00.getOperand(0);
2934 if (Op000.getValueType() == NodeVT) {
2940 case SPUISD::VEC2PREFSLOT: {
2941 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2943 Result = Op0.getOperand(0);
2951 // Otherwise, return unchanged.
2953 if (Result.getNode()) {
2954 DEBUG(cerr << "\nReplace.SPU: ");
2955 DEBUG(N->dump(&DAG));
2956 DEBUG(cerr << "\nWith: ");
2957 DEBUG(Result.getNode()->dump(&DAG));
2958 DEBUG(cerr << "\n");
2965 //===----------------------------------------------------------------------===//
2966 // Inline Assembly Support
2967 //===----------------------------------------------------------------------===//
2969 /// getConstraintType - Given a constraint letter, return the type of
2970 /// constraint it is for this target.
2971 SPUTargetLowering::ConstraintType
2972 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2973 if (ConstraintLetter.size() == 1) {
2974 switch (ConstraintLetter[0]) {
2981 return C_RegisterClass;
2984 return TargetLowering::getConstraintType(ConstraintLetter);
2987 std::pair<unsigned, const TargetRegisterClass*>
2988 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2991 if (Constraint.size() == 1) {
2992 // GCC RS6000 Constraint Letters
2993 switch (Constraint[0]) {
2997 return std::make_pair(0U, SPU::R64CRegisterClass);
2998 return std::make_pair(0U, SPU::R32CRegisterClass);
3001 return std::make_pair(0U, SPU::R32FPRegisterClass);
3002 else if (VT == MVT::f64)
3003 return std::make_pair(0U, SPU::R64FPRegisterClass);
3006 return std::make_pair(0U, SPU::GPRCRegisterClass);
3010 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3013 //! Compute used/known bits for a SPU operand
3015 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3019 const SelectionDAG &DAG,
3020 unsigned Depth ) const {
3022 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3024 switch (Op.getOpcode()) {
3026 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3032 case SPUISD::PREFSLOT2VEC:
3033 case SPUISD::LDRESULT:
3034 case SPUISD::VEC2PREFSLOT:
3035 case SPUISD::SHLQUAD_L_BITS:
3036 case SPUISD::SHLQUAD_L_BYTES:
3037 case SPUISD::VEC_SHL:
3038 case SPUISD::VEC_SRL:
3039 case SPUISD::VEC_SRA:
3040 case SPUISD::VEC_ROTL:
3041 case SPUISD::VEC_ROTR:
3042 case SPUISD::ROTBYTES_LEFT:
3043 case SPUISD::SELECT_MASK:
3050 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3051 unsigned Depth) const {
3052 switch (Op.getOpcode()) {
3057 MVT VT = Op.getValueType();
3059 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3062 return VT.getSizeInBits();
3067 // LowerAsmOperandForConstraint
3069 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3070 char ConstraintLetter,
3072 std::vector<SDValue> &Ops,
3073 SelectionDAG &DAG) const {
3074 // Default, for the time being, to the base class handler
3075 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3079 /// isLegalAddressImmediate - Return true if the integer value can be used
3080 /// as the offset of the target addressing mode.
3081 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3082 const Type *Ty) const {
3083 // SPU's addresses are 256K:
3084 return (V > -(1 << 18) && V < (1 << 18) - 1);
3087 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3092 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3093 // The SPU target isn't yet aware of offsets.