1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
73 cerr << "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG,
119 return CallInfo.first;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
124 : TargetLowering(TM),
127 // Fold away setcc operations if possible.
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
139 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
140 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
141 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
142 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
143 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
144 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
149 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
152 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
156 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
161 MVT VT = (MVT::SimpleValueType)sctype;
163 setOperationAction(ISD::LOAD, VT, Custom);
164 setOperationAction(ISD::STORE, VT, Custom);
165 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
167 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
169 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
170 MVT StoreVT = (MVT::SimpleValueType) stype;
171 setTruncStoreAction(VT, StoreVT, Expand);
175 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
177 MVT VT = (MVT::SimpleValueType) sctype;
179 setOperationAction(ISD::LOAD, VT, Custom);
180 setOperationAction(ISD::STORE, VT, Custom);
182 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
183 MVT StoreVT = (MVT::SimpleValueType) stype;
184 setTruncStoreAction(VT, StoreVT, Expand);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
190 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
194 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
197 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM, MVT::i32, Expand);
204 setOperationAction(ISD::UREM, MVT::i32, Expand);
205 setOperationAction(ISD::SREM, MVT::i64, Expand);
206 setOperationAction(ISD::UREM, MVT::i64, Expand);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN , MVT::f64, Expand);
210 setOperationAction(ISD::FCOS , MVT::f64, Expand);
211 setOperationAction(ISD::FREM , MVT::f64, Expand);
212 setOperationAction(ISD::FSIN , MVT::f32, Expand);
213 setOperationAction(ISD::FCOS , MVT::f32, Expand);
214 setOperationAction(ISD::FREM , MVT::f32, Expand);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
218 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
219 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
222 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
229 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
231 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
233 setOperationAction(ISD::ROTL, MVT::i32, Legal);
234 setOperationAction(ISD::ROTL, MVT::i16, Legal);
235 setOperationAction(ISD::ROTL, MVT::i8, Custom);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL, MVT::i8, Custom);
239 setOperationAction(ISD::SRL, MVT::i8, Custom);
240 setOperationAction(ISD::SRA, MVT::i8, Custom);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL, MVT::i64, Legal);
244 setOperationAction(ISD::SRL, MVT::i64, Legal);
245 setOperationAction(ISD::SRA, MVT::i64, Legal);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL, MVT::i8, Custom);
249 setOperationAction(ISD::MUL, MVT::i32, Legal);
250 setOperationAction(ISD::MUL, MVT::i64, Legal);
252 // Need to custom handle (some) common i8, i64 math ops
253 setOperationAction(ISD::ADD, MVT::i8, Custom);
254 setOperationAction(ISD::ADD, MVT::i64, Legal);
255 setOperationAction(ISD::SUB, MVT::i8, Custom);
256 setOperationAction(ISD::SUB, MVT::i64, Legal);
258 // SPU does not have BSWAP. It does have i32 support CTLZ.
259 // CTPOP has to be custom lowered.
260 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
261 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
263 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
264 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
265 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
266 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
268 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
269 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
271 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
273 // SPU has a version of select that implements (a&~c)|(b&c), just like
274 // select ought to work:
275 setOperationAction(ISD::SELECT, MVT::i8, Legal);
276 setOperationAction(ISD::SELECT, MVT::i16, Legal);
277 setOperationAction(ISD::SELECT, MVT::i32, Legal);
278 setOperationAction(ISD::SELECT, MVT::i64, Legal);
280 setOperationAction(ISD::SETCC, MVT::i8, Legal);
281 setOperationAction(ISD::SETCC, MVT::i16, Legal);
282 setOperationAction(ISD::SETCC, MVT::i32, Legal);
283 setOperationAction(ISD::SETCC, MVT::i64, Legal);
284 setOperationAction(ISD::SETCC, MVT::f64, Custom);
286 // Custom lower i128 -> i64 truncates
287 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
289 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
290 // to expand to a libcall, hence the custom lowering:
291 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
297 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
298 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
317 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
323 MVT VT = (MVT::SimpleValueType)sctype;
325 setOperationAction(ISD::GlobalAddress, VT, Custom);
326 setOperationAction(ISD::ConstantPool, VT, Custom);
327 setOperationAction(ISD::JumpTable, VT, Custom);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET, MVT::Other, Custom);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART , MVT::Other, Custom);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG , MVT::Other, Expand);
338 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
339 setOperationAction(ISD::VAEND , MVT::Other, Expand);
340 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
341 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
347 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
364 // "Odd size" vector classes that we're willing to support:
365 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
367 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
368 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
369 MVT VT = (MVT::SimpleValueType)i;
371 // add/sub are legal for all supported vector VT's.
372 setOperationAction(ISD::ADD, VT, Legal);
373 setOperationAction(ISD::SUB, VT, Legal);
374 // mul has to be custom lowered.
375 setOperationAction(ISD::MUL, VT, Legal);
377 setOperationAction(ISD::AND, VT, Legal);
378 setOperationAction(ISD::OR, VT, Legal);
379 setOperationAction(ISD::XOR, VT, Legal);
380 setOperationAction(ISD::LOAD, VT, Legal);
381 setOperationAction(ISD::SELECT, VT, Legal);
382 setOperationAction(ISD::STORE, VT, Legal);
384 // These operations need to be expanded:
385 setOperationAction(ISD::SDIV, VT, Expand);
386 setOperationAction(ISD::SREM, VT, Expand);
387 setOperationAction(ISD::UDIV, VT, Expand);
388 setOperationAction(ISD::UREM, VT, Expand);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
393 setOperationAction(ISD::ConstantPool, VT, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
407 setShiftAmountType(MVT::i32);
408 setBooleanContents(ZeroOrNegativeOneBooleanContent);
410 setStackPointerRegisterToSaveRestore(SPU::R1);
412 // We have target-specific dag combine patterns for the following nodes:
413 setTargetDAGCombine(ISD::ADD);
414 setTargetDAGCombine(ISD::ZERO_EXTEND);
415 setTargetDAGCombine(ISD::SIGN_EXTEND);
416 setTargetDAGCombine(ISD::ANY_EXTEND);
418 computeRegisterProperties();
420 // Set pre-RA register scheduler default to BURR, which produces slightly
421 // better code than the default (could also be TDRR, but TargetLowering.h
422 // needs a mod to support that model):
423 setSchedulingPreference(SchedulingForRegPressure);
427 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
429 if (node_names.empty()) {
430 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
431 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
432 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
433 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
434 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
435 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
436 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
437 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
438 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
439 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
440 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
441 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
442 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
446 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
447 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
448 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
449 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
452 "SPUISD::ROTBYTES_LEFT_BITS";
453 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
454 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
455 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
456 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
457 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
460 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
462 return ((i != node_names.end()) ? i->second : 0);
465 //===----------------------------------------------------------------------===//
466 // Return the Cell SPU's SETCC result type
467 //===----------------------------------------------------------------------===//
469 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
470 // i16 and i32 are valid SETCC result types
471 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
474 //===----------------------------------------------------------------------===//
475 // Calling convention code:
476 //===----------------------------------------------------------------------===//
478 #include "SPUGenCallingConv.inc"
480 //===----------------------------------------------------------------------===//
481 // LowerOperation implementation
482 //===----------------------------------------------------------------------===//
484 /// Custom lower loads for CellSPU
486 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
487 within a 16-byte block, we have to rotate to extract the requested element.
489 For extending loads, we also want to ensure that the following sequence is
490 emitted, e.g. for MVT::f32 extending load to MVT::f64:
494 %2 v16i8,ch = rotate %1
495 %3 v4f8, ch = bitconvert %2
496 %4 f32 = vec2perfslot %3
497 %5 f64 = fp_extend %4
501 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
502 LoadSDNode *LN = cast<LoadSDNode>(Op);
503 SDValue the_chain = LN->getChain();
504 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
505 MVT InVT = LN->getMemoryVT();
506 MVT OutVT = Op.getValueType();
507 ISD::LoadExtType ExtType = LN->getExtensionType();
508 unsigned alignment = LN->getAlignment();
509 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
510 DebugLoc dl = Op.getDebugLoc();
512 switch (LN->getAddressingMode()) {
513 case ISD::UNINDEXED: {
515 SDValue basePtr = LN->getBasePtr();
518 if (alignment == 16) {
521 // Special cases for a known aligned load to simplify the base pointer
522 // and the rotation amount:
523 if (basePtr.getOpcode() == ISD::ADD
524 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
525 // Known offset into basePtr
526 int64_t offset = CN->getSExtValue();
527 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
532 rotate = DAG.getConstant(rotamt, MVT::i16);
534 // Simplify the base pointer for this case:
535 basePtr = basePtr.getOperand(0);
536 if ((offset & ~0xf) > 0) {
537 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
539 DAG.getConstant((offset & ~0xf), PtrVT));
541 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
542 || (basePtr.getOpcode() == SPUISD::IndirectAddr
543 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
544 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
545 // Plain aligned a-form address: rotate into preferred slot
546 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
547 int64_t rotamt = -vtm->prefslot_byte;
550 rotate = DAG.getConstant(rotamt, MVT::i16);
552 // Offset the rotate amount by the basePtr and the preferred slot
554 int64_t rotamt = -vtm->prefslot_byte;
557 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
559 DAG.getConstant(rotamt, PtrVT));
562 // Unaligned load: must be more pessimistic about addressing modes:
563 if (basePtr.getOpcode() == ISD::ADD) {
564 MachineFunction &MF = DAG.getMachineFunction();
565 MachineRegisterInfo &RegInfo = MF.getRegInfo();
566 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
569 SDValue Op0 = basePtr.getOperand(0);
570 SDValue Op1 = basePtr.getOperand(1);
572 if (isa<ConstantSDNode>(Op1)) {
573 // Convert the (add <ptr>, <const>) to an indirect address contained
574 // in a register. Note that this is done because we need to avoid
575 // creating a 0(reg) d-form address due to the SPU's block loads.
576 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
577 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
578 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
580 // Convert the (add <arg1>, <arg2>) to an indirect address, which
581 // will likely be lowered as a reg(reg) x-form address.
582 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
585 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
587 DAG.getConstant(0, PtrVT));
590 // Offset the rotate amount by the basePtr and the preferred slot
592 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
594 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
597 // Re-emit as a v16i8 vector load
598 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
599 LN->getSrcValue(), LN->getSrcValueOffset(),
600 LN->isVolatile(), 16);
603 the_chain = result.getValue(1);
605 // Rotate into the preferred slot:
606 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
607 result.getValue(0), rotate);
609 // Convert the loaded v16i8 vector to the appropriate vector type
610 // specified by the operand:
611 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
612 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
613 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
615 // Handle extending loads by extending the scalar result:
616 if (ExtType == ISD::SEXTLOAD) {
617 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
618 } else if (ExtType == ISD::ZEXTLOAD) {
619 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
620 } else if (ExtType == ISD::EXTLOAD) {
621 unsigned NewOpc = ISD::ANY_EXTEND;
623 if (OutVT.isFloatingPoint())
624 NewOpc = ISD::FP_EXTEND;
626 result = DAG.getNode(NewOpc, dl, OutVT, result);
629 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
630 SDValue retops[2] = {
635 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
636 retops, sizeof(retops) / sizeof(retops[0]));
643 case ISD::LAST_INDEXED_MODE:
644 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
646 cerr << (unsigned) LN->getAddressingMode() << "\n";
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
661 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
662 StoreSDNode *SN = cast<StoreSDNode>(Op);
663 SDValue Value = SN->getValue();
664 MVT VT = Value.getValueType();
665 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
666 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
667 DebugLoc dl = Op.getDebugLoc();
668 unsigned alignment = SN->getAlignment();
670 switch (SN->getAddressingMode()) {
671 case ISD::UNINDEXED: {
672 // The vector type we really want to load from the 16-byte chunk.
673 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
674 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
676 SDValue alignLoadVec;
677 SDValue basePtr = SN->getBasePtr();
678 SDValue the_chain = SN->getChain();
679 SDValue insertEltOffs;
681 if (alignment == 16) {
684 // Special cases for a known aligned load to simplify the base pointer
685 // and insertion byte:
686 if (basePtr.getOpcode() == ISD::ADD
687 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
688 // Known offset into basePtr
689 int64_t offset = CN->getSExtValue();
691 // Simplify the base pointer for this case:
692 basePtr = basePtr.getOperand(0);
693 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
695 DAG.getConstant((offset & 0xf), PtrVT));
697 if ((offset & ~0xf) > 0) {
698 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
700 DAG.getConstant((offset & ~0xf), PtrVT));
703 // Otherwise, assume it's at byte 0 of basePtr
704 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
706 DAG.getConstant(0, PtrVT));
709 // Unaligned load: must be more pessimistic about addressing modes:
710 if (basePtr.getOpcode() == ISD::ADD) {
711 MachineFunction &MF = DAG.getMachineFunction();
712 MachineRegisterInfo &RegInfo = MF.getRegInfo();
713 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
716 SDValue Op0 = basePtr.getOperand(0);
717 SDValue Op1 = basePtr.getOperand(1);
719 if (isa<ConstantSDNode>(Op1)) {
720 // Convert the (add <ptr>, <const>) to an indirect address contained
721 // in a register. Note that this is done because we need to avoid
722 // creating a 0(reg) d-form address due to the SPU's block loads.
723 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
724 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
725 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
727 // Convert the (add <arg1>, <arg2>) to an indirect address, which
728 // will likely be lowered as a reg(reg) x-form address.
729 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
732 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
734 DAG.getConstant(0, PtrVT));
737 // Insertion point is solely determined by basePtr's contents
738 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
740 DAG.getConstant(0, PtrVT));
743 // Re-emit as a v16i8 vector load
744 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
745 SN->getSrcValue(), SN->getSrcValueOffset(),
746 SN->isVolatile(), 16);
749 the_chain = alignLoadVec.getValue(1);
751 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
752 SDValue theValue = SN->getValue();
756 && (theValue.getOpcode() == ISD::AssertZext
757 || theValue.getOpcode() == ISD::AssertSext)) {
758 // Drill down and get the value for zero- and sign-extended
760 theValue = theValue.getOperand(0);
763 // If the base pointer is already a D-form address, then just create
764 // a new D-form address with a slot offset and the orignal base pointer.
765 // Otherwise generate a D-form address with the slot offset relative
766 // to the stack pointer, which is always aligned.
768 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
769 cerr << "CellSPU LowerSTORE: basePtr = ";
770 basePtr.getNode()->dump(&DAG);
775 SDValue insertEltOp =
776 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
777 SDValue vectorizeOp =
778 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
780 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
781 vectorizeOp, alignLoadVec,
782 DAG.getNode(ISD::BIT_CONVERT, dl,
783 MVT::v4i32, insertEltOp));
785 result = DAG.getStore(the_chain, dl, result, basePtr,
786 LN->getSrcValue(), LN->getSrcValueOffset(),
787 LN->isVolatile(), LN->getAlignment());
789 #if 0 && !defined(NDEBUG)
790 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
791 const SDValue ¤tRoot = DAG.getRoot();
794 cerr << "------- CellSPU:LowerStore result:\n";
797 DAG.setRoot(currentRoot);
808 case ISD::LAST_INDEXED_MODE:
809 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
811 cerr << (unsigned) SN->getAddressingMode() << "\n";
819 //! Generate the address of a constant pool entry.
821 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
822 MVT PtrVT = Op.getValueType();
823 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
824 Constant *C = CP->getConstVal();
825 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
826 SDValue Zero = DAG.getConstant(0, PtrVT);
827 const TargetMachine &TM = DAG.getTarget();
828 // FIXME there is no actual debug info here
829 DebugLoc dl = Op.getDebugLoc();
831 if (TM.getRelocationModel() == Reloc::Static) {
832 if (!ST->usingLargeMem()) {
833 // Just return the SDValue with the constant pool address in it.
834 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
836 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
837 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
838 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
843 "LowerConstantPool: Relocation model other than static"
848 //! Alternate entry point for generating the address of a constant pool entry
850 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
851 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
855 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
856 MVT PtrVT = Op.getValueType();
857 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
858 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
859 SDValue Zero = DAG.getConstant(0, PtrVT);
860 const TargetMachine &TM = DAG.getTarget();
861 // FIXME there is no actual debug info here
862 DebugLoc dl = Op.getDebugLoc();
864 if (TM.getRelocationModel() == Reloc::Static) {
865 if (!ST->usingLargeMem()) {
866 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
868 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
869 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
870 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
875 "LowerJumpTable: Relocation model other than static not supported.");
880 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
881 MVT PtrVT = Op.getValueType();
882 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
883 GlobalValue *GV = GSDN->getGlobal();
884 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
885 const TargetMachine &TM = DAG.getTarget();
886 SDValue Zero = DAG.getConstant(0, PtrVT);
887 // FIXME there is no actual debug info here
888 DebugLoc dl = Op.getDebugLoc();
890 if (TM.getRelocationModel() == Reloc::Static) {
891 if (!ST->usingLargeMem()) {
892 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
894 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
895 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
896 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
899 cerr << "LowerGlobalAddress: Relocation model other than static not "
908 //! Custom lower double precision floating point constants
910 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
911 MVT VT = Op.getValueType();
912 // FIXME there is no actual debug info here
913 DebugLoc dl = Op.getDebugLoc();
915 if (VT == MVT::f64) {
916 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
919 "LowerConstantFP: Node is not ConstantFPSDNode");
921 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
922 SDValue T = DAG.getConstant(dbits, MVT::i64);
923 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
924 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
925 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
932 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
934 MachineFunction &MF = DAG.getMachineFunction();
935 MachineFrameInfo *MFI = MF.getFrameInfo();
936 MachineRegisterInfo &RegInfo = MF.getRegInfo();
937 SmallVector<SDValue, 48> ArgValues;
938 SDValue Root = Op.getOperand(0);
939 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
940 DebugLoc dl = Op.getDebugLoc();
942 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
943 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
945 unsigned ArgOffset = SPUFrameInfo::minStackSize();
946 unsigned ArgRegIdx = 0;
947 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
949 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
951 // Add DAG nodes to load the arguments or copy them out of registers.
952 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
953 ArgNo != e; ++ArgNo) {
954 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
955 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
958 if (ArgRegIdx < NumArgRegs) {
959 const TargetRegisterClass *ArgRegClass;
961 switch (ObjectVT.getSimpleVT()) {
963 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
964 << ObjectVT.getMVTString()
969 ArgRegClass = &SPU::R8CRegClass;
972 ArgRegClass = &SPU::R16CRegClass;
975 ArgRegClass = &SPU::R32CRegClass;
978 ArgRegClass = &SPU::R64CRegClass;
981 ArgRegClass = &SPU::GPRCRegClass;
984 ArgRegClass = &SPU::R32FPRegClass;
987 ArgRegClass = &SPU::R64FPRegClass;
995 ArgRegClass = &SPU::VECREGRegClass;
999 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1004 // We need to load the argument to a virtual register if we determined
1005 // above that we ran out of physical registers of the appropriate type
1006 // or we're forced to do vararg
1007 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1008 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1009 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1010 ArgOffset += StackSlotSize;
1013 ArgValues.push_back(ArgVal);
1015 Root = ArgVal.getOperand(0);
1020 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1021 // We will spill (79-3)+1 registers to the stack
1022 SmallVector<SDValue, 79-3+1> MemOps;
1024 // Create the frame slot
1026 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1027 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1028 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1029 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1030 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1031 Root = Store.getOperand(0);
1032 MemOps.push_back(Store);
1034 // Increment address by stack slot size for the next stored argument
1035 ArgOffset += StackSlotSize;
1037 if (!MemOps.empty())
1038 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1039 &MemOps[0], MemOps.size());
1042 ArgValues.push_back(Root);
1044 // Return the new list of results.
1045 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1046 &ArgValues[0], ArgValues.size());
1049 /// isLSAAddress - Return the immediate to use if the specified
1050 /// value is representable as a LSA address.
1051 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1055 int Addr = C->getZExtValue();
1056 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1057 (Addr << 14 >> 14) != Addr)
1058 return 0; // Top 14 bits have to be sext of immediate.
1060 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1064 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1065 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1066 SDValue Chain = TheCall->getChain();
1067 SDValue Callee = TheCall->getCallee();
1068 unsigned NumOps = TheCall->getNumArgs();
1069 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1070 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1071 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1072 DebugLoc dl = TheCall->getDebugLoc();
1074 // Handy pointer type
1075 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1077 // Accumulate how many bytes are to be pushed on the stack, including the
1078 // linkage area, and parameter passing area. According to the SPU ABI,
1079 // we minimally need space for [LR] and [SP]
1080 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1082 // Set up a copy of the stack pointer for use loading and storing any
1083 // arguments that may not fit in the registers available for argument
1085 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1087 // Figure out which arguments are going to go in registers, and which in
1089 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1090 unsigned ArgRegIdx = 0;
1092 // Keep track of registers passing arguments
1093 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1094 // And the arguments passed on the stack
1095 SmallVector<SDValue, 8> MemOpChains;
1097 for (unsigned i = 0; i != NumOps; ++i) {
1098 SDValue Arg = TheCall->getArg(i);
1100 // PtrOff will be used to store the current argument to the stack if a
1101 // register cannot be found for it.
1102 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1103 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1105 switch (Arg.getValueType().getSimpleVT()) {
1106 default: assert(0 && "Unexpected ValueType for argument!");
1112 if (ArgRegIdx != NumArgRegs) {
1113 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1115 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1116 ArgOffset += StackSlotSize;
1121 if (ArgRegIdx != NumArgRegs) {
1122 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1124 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1125 ArgOffset += StackSlotSize;
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1137 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1144 // Update number of stack bytes actually used, insert a call sequence start
1145 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1146 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1149 if (!MemOpChains.empty()) {
1150 // Adjust the stack pointer for the stack arguments.
1151 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1152 &MemOpChains[0], MemOpChains.size());
1155 // Build a sequence of copy-to-reg nodes chained together with token chain
1156 // and flag operands which copy the outgoing args into the appropriate regs.
1158 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1159 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1160 RegsToPass[i].second, InFlag);
1161 InFlag = Chain.getValue(1);
1164 SmallVector<SDValue, 8> Ops;
1165 unsigned CallOpc = SPUISD::CALL;
1167 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1168 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1169 // node so that legalize doesn't hack it.
1170 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1171 GlobalValue *GV = G->getGlobal();
1172 MVT CalleeVT = Callee.getValueType();
1173 SDValue Zero = DAG.getConstant(0, PtrVT);
1174 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1176 if (!ST->usingLargeMem()) {
1177 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1178 // style calls, otherwise, external symbols are BRASL calls. This assumes
1179 // that declared/defined symbols are in the same compilation unit and can
1180 // be reached through PC-relative jumps.
1183 // This may be an unsafe assumption for JIT and really large compilation
1185 if (GV->isDeclaration()) {
1186 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1188 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1191 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1193 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1195 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1196 MVT CalleeVT = Callee.getValueType();
1197 SDValue Zero = DAG.getConstant(0, PtrVT);
1198 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1199 Callee.getValueType());
1201 if (!ST->usingLargeMem()) {
1202 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1204 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1206 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1207 // If this is an absolute destination address that appears to be a legal
1208 // local store address, use the munged value.
1209 Callee = SDValue(Dest, 0);
1212 Ops.push_back(Chain);
1213 Ops.push_back(Callee);
1215 // Add argument registers to the end of the list so that they are known live
1217 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1218 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1219 RegsToPass[i].second.getValueType()));
1221 if (InFlag.getNode())
1222 Ops.push_back(InFlag);
1223 // Returns a chain and a flag for retval copy to use.
1224 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1225 &Ops[0], Ops.size());
1226 InFlag = Chain.getValue(1);
1228 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1229 DAG.getIntPtrConstant(0, true), InFlag);
1230 if (TheCall->getValueType(0) != MVT::Other)
1231 InFlag = Chain.getValue(1);
1233 SDValue ResultVals[3];
1234 unsigned NumResults = 0;
1236 // If the call has results, copy the values out of the ret val registers.
1237 switch (TheCall->getValueType(0).getSimpleVT()) {
1238 default: assert(0 && "Unexpected ret value!");
1239 case MVT::Other: break;
1241 if (TheCall->getValueType(1) == MVT::i32) {
1242 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1243 MVT::i32, InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1246 Chain.getValue(2)).getValue(1);
1247 ResultVals[1] = Chain.getValue(0);
1250 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1251 InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1257 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1263 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1264 InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1270 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1271 InFlag).getValue(1);
1272 ResultVals[0] = Chain.getValue(0);
1281 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1282 InFlag).getValue(1);
1283 ResultVals[0] = Chain.getValue(0);
1288 // If the function returns void, just return the chain.
1289 if (NumResults == 0)
1292 // Otherwise, merge everything together with a MERGE_VALUES node.
1293 ResultVals[NumResults++] = Chain;
1294 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1295 return Res.getValue(Op.getResNo());
1299 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1300 SmallVector<CCValAssign, 16> RVLocs;
1301 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1302 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1303 DebugLoc dl = Op.getDebugLoc();
1304 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1305 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1307 // If this is the first return lowered for this function, add the regs to the
1308 // liveout set for the function.
1309 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1310 for (unsigned i = 0; i != RVLocs.size(); ++i)
1311 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1314 SDValue Chain = Op.getOperand(0);
1317 // Copy the result values into the output registers.
1318 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1319 CCValAssign &VA = RVLocs[i];
1320 assert(VA.isRegLoc() && "Can only return in registers!");
1321 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1322 Op.getOperand(i*2+1), Flag);
1323 Flag = Chain.getValue(1);
1327 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1329 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1333 //===----------------------------------------------------------------------===//
1334 // Vector related lowering:
1335 //===----------------------------------------------------------------------===//
1337 static ConstantSDNode *
1338 getVecImm(SDNode *N) {
1339 SDValue OpVal(0, 0);
1341 // Check to see if this buildvec has a single non-undef value in its elements.
1342 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1343 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1344 if (OpVal.getNode() == 0)
1345 OpVal = N->getOperand(i);
1346 else if (OpVal != N->getOperand(i))
1350 if (OpVal.getNode() != 0) {
1351 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1356 return 0; // All UNDEF: use implicit def.; not Constant node
1359 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1360 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1362 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1364 if (ConstantSDNode *CN = getVecImm(N)) {
1365 uint64_t Value = CN->getZExtValue();
1366 if (ValueType == MVT::i64) {
1367 uint64_t UValue = CN->getZExtValue();
1368 uint32_t upper = uint32_t(UValue >> 32);
1369 uint32_t lower = uint32_t(UValue);
1372 Value = Value >> 32;
1374 if (Value <= 0x3ffff)
1375 return DAG.getTargetConstant(Value, ValueType);
1381 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1382 /// and the value fits into a signed 16-bit constant, and if so, return the
1384 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1386 if (ConstantSDNode *CN = getVecImm(N)) {
1387 int64_t Value = CN->getSExtValue();
1388 if (ValueType == MVT::i64) {
1389 uint64_t UValue = CN->getZExtValue();
1390 uint32_t upper = uint32_t(UValue >> 32);
1391 uint32_t lower = uint32_t(UValue);
1394 Value = Value >> 32;
1396 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1397 return DAG.getTargetConstant(Value, ValueType);
1404 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 10-bit constant, and if so, return the
1407 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1409 if (ConstantSDNode *CN = getVecImm(N)) {
1410 int64_t Value = CN->getSExtValue();
1411 if (ValueType == MVT::i64) {
1412 uint64_t UValue = CN->getZExtValue();
1413 uint32_t upper = uint32_t(UValue >> 32);
1414 uint32_t lower = uint32_t(UValue);
1417 Value = Value >> 32;
1419 if (isS10Constant(Value))
1420 return DAG.getTargetConstant(Value, ValueType);
1426 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1427 /// and the value fits into a signed 8-bit constant, and if so, return the
1430 /// @note: The incoming vector is v16i8 because that's the only way we can load
1431 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1433 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 int Value = (int) CN->getZExtValue();
1437 if (ValueType == MVT::i16
1438 && Value <= 0xffff /* truncated from uint64_t */
1439 && ((short) Value >> 8) == ((short) Value & 0xff))
1440 return DAG.getTargetConstant(Value & 0xff, ValueType);
1441 else if (ValueType == MVT::i8
1442 && (Value & 0xff) == Value)
1443 return DAG.getTargetConstant(Value, ValueType);
1449 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1450 /// and the value fits into a signed 16-bit constant, and if so, return the
1452 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1454 if (ConstantSDNode *CN = getVecImm(N)) {
1455 uint64_t Value = CN->getZExtValue();
1456 if ((ValueType == MVT::i32
1457 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1458 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1459 return DAG.getTargetConstant(Value >> 16, ValueType);
1465 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1466 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1467 if (ConstantSDNode *CN = getVecImm(N)) {
1468 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1474 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1475 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1476 if (ConstantSDNode *CN = getVecImm(N)) {
1477 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1483 // If this is a vector of constants or undefs, get the bits. A bit in
1484 // UndefBits is set if the corresponding element of the vector is an
1485 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1486 // zero. Return true if this is not an array of constants, false if it is.
1488 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1489 uint64_t UndefBits[2]) {
1490 // Start with zero'd results.
1491 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1493 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1494 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1495 SDValue OpVal = BV->getOperand(i);
1497 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1498 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1500 uint64_t EltBits = 0;
1501 if (OpVal.getOpcode() == ISD::UNDEF) {
1502 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1503 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1505 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1506 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1507 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1508 const APFloat &apf = CN->getValueAPF();
1509 EltBits = (CN->getValueType(0) == MVT::f32
1510 ? FloatToBits(apf.convertToFloat())
1511 : DoubleToBits(apf.convertToDouble()));
1513 // Nonconstant element.
1517 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1520 //printf("%llx %llx %llx %llx\n",
1521 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1525 /// If this is a splat (repetition) of a value across the whole vector, return
1526 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1527 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1528 /// SplatSize = 1 byte.
1529 static bool isConstantSplat(const uint64_t Bits128[2],
1530 const uint64_t Undef128[2],
1532 uint64_t &SplatBits, uint64_t &SplatUndef,
1534 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1535 // the same as the lower 64-bits, ignoring undefs.
1536 uint64_t Bits64 = Bits128[0] | Bits128[1];
1537 uint64_t Undef64 = Undef128[0] & Undef128[1];
1538 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1539 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1540 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1541 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1543 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1544 if (MinSplatBits < 64) {
1546 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1548 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1549 if (MinSplatBits < 32) {
1551 // If the top 16-bits are different than the lower 16-bits, ignoring
1552 // undefs, we have an i32 splat.
1553 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1554 if (MinSplatBits < 16) {
1555 // If the top 8-bits are different than the lower 8-bits, ignoring
1556 // undefs, we have an i16 splat.
1557 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1558 == ((Bits16 >> 8) & ~Undef16)) {
1559 // Otherwise, we have an 8-bit splat.
1560 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1561 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1567 SplatUndef = Undef16;
1574 SplatUndef = Undef32;
1580 SplatBits = Bits128[0];
1581 SplatUndef = Undef128[0];
1587 return false; // Can't be a splat if two pieces don't match.
1590 //! Lower a BUILD_VECTOR instruction creatively:
1592 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1593 MVT VT = Op.getValueType();
1594 DebugLoc dl = Op.getDebugLoc();
1595 // If this is a vector of constants or undefs, get the bits. A bit in
1596 // UndefBits is set if the corresponding element of the vector is an
1597 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1599 uint64_t VectorBits[2];
1600 uint64_t UndefBits[2];
1601 uint64_t SplatBits, SplatUndef;
1603 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1604 || !isConstantSplat(VectorBits, UndefBits,
1605 VT.getVectorElementType().getSizeInBits(),
1606 SplatBits, SplatUndef, SplatSize))
1607 return SDValue(); // Not a constant vector, not a splat.
1609 switch (VT.getSimpleVT()) {
1611 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1612 << VT.getMVTString()
1617 uint32_t Value32 = uint32_t(SplatBits);
1618 assert(SplatSize == 4
1619 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1620 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1621 SDValue T = DAG.getConstant(Value32, MVT::i32);
1622 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1623 DAG.getNode(ISD::BUILD_VECTOR, dl,
1624 MVT::v4i32, T, T, T, T));
1628 uint64_t f64val = uint64_t(SplatBits);
1629 assert(SplatSize == 8
1630 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1631 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1632 SDValue T = DAG.getConstant(f64val, MVT::i64);
1633 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1634 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1638 // 8-bit constants have to be expanded to 16-bits
1639 unsigned short Value16 = SplatBits | (SplatBits << 8);
1641 for (int i = 0; i < 8; ++i)
1642 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1643 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1644 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, Ops, 8));
1647 unsigned short Value16;
1649 Value16 = (unsigned short) (SplatBits & 0xffff);
1651 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1652 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1654 for (int i = 0; i < 8; ++i) Ops[i] = T;
1655 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops, 8);
1658 unsigned int Value = SplatBits;
1659 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1660 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1663 unsigned int Value = SplatBits;
1664 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1665 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1668 return SPU::LowerSplat_v2i64(VT, DAG, SplatBits, dl);
1676 SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1678 uint32_t upper = uint32_t(SplatVal >> 32);
1679 uint32_t lower = uint32_t(SplatVal);
1681 if (upper == lower) {
1682 // Magic constant that can be matched by IL, ILA, et. al.
1683 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1684 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1685 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1686 Val, Val, Val, Val));
1690 SmallVector<SDValue, 16> ShufBytes;
1692 bool upper_special, lower_special;
1694 // NOTE: This code creates common-case shuffle masks that can be easily
1695 // detected as common expressions. It is not attempting to create highly
1696 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1698 // Detect if the upper or lower half is a special shuffle mask pattern:
1699 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1700 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1702 // Create lower vector if not a special pattern
1703 if (!lower_special) {
1704 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1705 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1706 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1707 LO32C, LO32C, LO32C, LO32C));
1710 // Create upper vector if not a special pattern
1711 if (!upper_special) {
1712 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1713 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1714 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1715 HI32C, HI32C, HI32C, HI32C));
1718 // If either upper or lower are special, then the two input operands are
1719 // the same (basically, one of them is a "don't care")
1724 if (lower_special && upper_special) {
1725 // Unhappy situation... both upper and lower are special, so punt with
1726 // a target constant:
1727 SDValue Zero = DAG.getConstant(0, MVT::i32);
1728 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Zero, Zero,
1732 for (int i = 0; i < 4; ++i) {
1734 for (int j = 0; j < 4; ++j) {
1736 bool process_upper, process_lower;
1738 process_upper = (upper_special && (i & 1) == 0);
1739 process_lower = (lower_special && (i & 1) == 1);
1741 if (process_upper || process_lower) {
1742 if ((process_upper && upper == 0)
1743 || (process_lower && lower == 0))
1745 else if ((process_upper && upper == 0xffffffff)
1746 || (process_lower && lower == 0xffffffff))
1748 else if ((process_upper && upper == 0x80000000)
1749 || (process_lower && lower == 0x80000000))
1750 val |= (j == 0 ? 0xe0 : 0x80);
1752 val |= i * 4 + j + ((i & 1) * 16);
1755 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1758 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1759 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1760 &ShufBytes[0], ShufBytes.size()));
1764 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1765 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1766 /// permutation vector, V3, is monotonically increasing with one "exception"
1767 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1768 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1769 /// In either case, the net result is going to eventually invoke SHUFB to
1770 /// permute/shuffle the bytes from V1 and V2.
1772 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1773 /// control word for byte/halfword/word insertion. This takes care of a single
1774 /// element move from V2 into V1.
1776 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1777 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1778 SDValue V1 = Op.getOperand(0);
1779 SDValue V2 = Op.getOperand(1);
1780 SDValue PermMask = Op.getOperand(2);
1781 DebugLoc dl = Op.getDebugLoc();
1783 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1785 // If we have a single element being moved from V1 to V2, this can be handled
1786 // using the C*[DX] compute mask instructions, but the vector elements have
1787 // to be monotonically increasing with one exception element.
1788 MVT VecVT = V1.getValueType();
1789 MVT EltVT = VecVT.getVectorElementType();
1790 unsigned EltsFromV2 = 0;
1792 unsigned V2EltIdx0 = 0;
1793 unsigned CurrElt = 0;
1794 unsigned MaxElts = VecVT.getVectorNumElements();
1795 unsigned PrevElt = 0;
1797 bool monotonic = true;
1800 if (EltVT == MVT::i8) {
1802 } else if (EltVT == MVT::i16) {
1804 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1806 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1809 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1811 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1812 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1813 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1816 if (SrcElt >= V2EltIdx0) {
1817 if (1 >= (++EltsFromV2)) {
1818 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1820 } else if (CurrElt != SrcElt) {
1828 if (PrevElt > 0 && SrcElt < MaxElts) {
1829 if ((PrevElt == SrcElt - 1)
1830 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1837 } else if (PrevElt == 0) {
1838 // First time through, need to keep track of previous element
1841 // This isn't a rotation, takes elements from vector 2
1848 if (EltsFromV2 == 1 && monotonic) {
1849 // Compute mask and shuffle
1850 MachineFunction &MF = DAG.getMachineFunction();
1851 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1852 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1853 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1854 // Initialize temporary register to 0
1855 SDValue InitTempReg =
1856 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1857 // Copy register's contents as index in SHUFFLE_MASK:
1858 SDValue ShufMaskOp =
1859 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1860 DAG.getTargetConstant(V2Elt, MVT::i32),
1861 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1862 // Use shuffle mask in SHUFB synthetic instruction:
1863 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1865 } else if (rotate) {
1866 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1868 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1869 V1, DAG.getConstant(rotamt, MVT::i16));
1871 // Convert the SHUFFLE_VECTOR mask's input element units to the
1873 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1875 SmallVector<SDValue, 16> ResultMask;
1876 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1878 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1881 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1883 for (unsigned j = 0; j < BytesPerElement; ++j) {
1884 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1889 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1890 &ResultMask[0], ResultMask.size());
1891 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1895 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1896 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1897 DebugLoc dl = Op.getDebugLoc();
1899 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1900 // For a constant, build the appropriate constant vector, which will
1901 // eventually simplify to a vector register load.
1903 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1904 SmallVector<SDValue, 16> ConstVecValues;
1908 // Create a constant vector:
1909 switch (Op.getValueType().getSimpleVT()) {
1910 default: assert(0 && "Unexpected constant value type in "
1911 "LowerSCALAR_TO_VECTOR");
1912 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1913 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1914 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1915 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1916 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1917 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1920 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1921 for (size_t j = 0; j < n_copies; ++j)
1922 ConstVecValues.push_back(CValue);
1924 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1925 &ConstVecValues[0], ConstVecValues.size());
1927 // Otherwise, copy the value from one register to another:
1928 switch (Op0.getValueType().getSimpleVT()) {
1929 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1936 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1943 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1944 MVT VT = Op.getValueType();
1945 SDValue N = Op.getOperand(0);
1946 SDValue Elt = Op.getOperand(1);
1947 DebugLoc dl = Op.getDebugLoc();
1950 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1951 // Constant argument:
1952 int EltNo = (int) C->getZExtValue();
1955 if (VT == MVT::i8 && EltNo >= 16)
1956 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1957 else if (VT == MVT::i16 && EltNo >= 8)
1958 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1959 else if (VT == MVT::i32 && EltNo >= 4)
1960 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1961 else if (VT == MVT::i64 && EltNo >= 2)
1962 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1964 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1965 // i32 and i64: Element 0 is the preferred slot
1966 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1969 // Need to generate shuffle mask and extract:
1970 int prefslot_begin = -1, prefslot_end = -1;
1971 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1973 switch (VT.getSimpleVT()) {
1975 assert(false && "Invalid value type!");
1977 prefslot_begin = prefslot_end = 3;
1981 prefslot_begin = 2; prefslot_end = 3;
1986 prefslot_begin = 0; prefslot_end = 3;
1991 prefslot_begin = 0; prefslot_end = 7;
1996 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1997 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1999 unsigned int ShufBytes[16];
2000 for (int i = 0; i < 16; ++i) {
2001 // zero fill uppper part of preferred slot, don't care about the
2003 unsigned int mask_val;
2004 if (i <= prefslot_end) {
2006 ((i < prefslot_begin)
2008 : elt_byte + (i - prefslot_begin));
2010 ShufBytes[i] = mask_val;
2012 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2015 SDValue ShufMask[4];
2016 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2017 unsigned bidx = i * 4;
2018 unsigned int bits = ((ShufBytes[bidx] << 24) |
2019 (ShufBytes[bidx+1] << 16) |
2020 (ShufBytes[bidx+2] << 8) |
2022 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2025 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2027 sizeof(ShufMask) / sizeof(ShufMask[0]));
2029 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2030 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2031 N, N, ShufMaskVec));
2033 // Variable index: Rotate the requested element into slot 0, then replicate
2034 // slot 0 across the vector
2035 MVT VecVT = N.getValueType();
2036 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2037 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2041 // Make life easier by making sure the index is zero-extended to i32
2042 if (Elt.getValueType() != MVT::i32)
2043 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2045 // Scale the index to a bit/byte shift quantity
2047 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2048 unsigned scaleShift = scaleFactor.logBase2();
2051 if (scaleShift > 0) {
2052 // Scale the shift factor:
2053 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2054 DAG.getConstant(scaleShift, MVT::i32));
2057 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2059 // Replicate the bytes starting at byte 0 across the entire vector (for
2060 // consistency with the notion of a unified register set)
2063 switch (VT.getSimpleVT()) {
2065 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2069 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2070 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
2075 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2076 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
2082 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2083 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
2089 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2090 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2091 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2092 loFactor, hiFactor, loFactor, hiFactor);
2097 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2098 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2099 vecShift, vecShift, replicate));
2105 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2106 SDValue VecOp = Op.getOperand(0);
2107 SDValue ValOp = Op.getOperand(1);
2108 SDValue IdxOp = Op.getOperand(2);
2109 DebugLoc dl = Op.getDebugLoc();
2110 MVT VT = Op.getValueType();
2112 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2113 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2115 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2116 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2117 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2118 DAG.getRegister(SPU::R1, PtrVT),
2119 DAG.getConstant(CN->getSExtValue(), PtrVT));
2120 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2123 DAG.getNode(SPUISD::SHUFB, dl, VT,
2124 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2126 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2131 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2132 const TargetLowering &TLI)
2134 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2135 DebugLoc dl = Op.getDebugLoc();
2136 MVT ShiftVT = TLI.getShiftAmountTy();
2138 assert(Op.getValueType() == MVT::i8);
2141 assert(0 && "Unhandled i8 math operator");
2145 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2147 SDValue N1 = Op.getOperand(1);
2148 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2149 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2150 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2151 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2156 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2158 SDValue N1 = Op.getOperand(1);
2159 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2160 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2161 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2162 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2166 SDValue N1 = Op.getOperand(1);
2168 N0 = (N0.getOpcode() != ISD::Constant
2169 ? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0)
2170 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2172 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2175 N1 = (N1.getOpcode() != ISD::Constant
2176 ? DAG.getNode(N1Opc, dl, ShiftVT, N1)
2177 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2178 TLI.getShiftAmountTy()));
2180 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2181 DAG.getNode(ISD::SHL, dl, MVT::i16,
2182 N0, DAG.getConstant(8, MVT::i32)));
2183 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2184 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2188 SDValue N1 = Op.getOperand(1);
2190 N0 = (N0.getOpcode() != ISD::Constant
2191 ? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0)
2192 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2194 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2197 N1 = (N1.getOpcode() != ISD::Constant
2198 ? DAG.getNode(N1Opc, dl, ShiftVT, N1)
2199 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2200 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2201 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2204 SDValue N1 = Op.getOperand(1);
2206 N0 = (N0.getOpcode() != ISD::Constant
2207 ? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0)
2208 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2210 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2213 N1 = (N1.getOpcode() != ISD::Constant
2214 ? DAG.getNode(N1Opc, dl, ShiftVT, N1)
2215 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2217 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2218 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2221 SDValue N1 = Op.getOperand(1);
2223 N0 = (N0.getOpcode() != ISD::Constant
2224 ? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0)
2225 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2227 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2228 N1 = (N1.getOpcode() != ISD::Constant
2229 ? DAG.getNode(N1Opc, dl, MVT::i16, N1)
2230 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2232 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2233 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2241 //! Generate the carry-generate shuffle mask.
2242 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
2243 SmallVector<SDValue, 16 > ShufBytes;
2245 // Create the shuffle mask for "rotating" the borrow up one register slot
2246 // once the borrow is generated.
2247 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2248 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2249 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2250 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2252 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2253 &ShufBytes[0], ShufBytes.size());
2256 //! Generate the borrow-generate shuffle mask
2257 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
2258 SmallVector<SDValue, 16 > ShufBytes;
2260 // Create the shuffle mask for "rotating" the borrow up one register slot
2261 // once the borrow is generated.
2262 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2263 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2264 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2265 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2267 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2268 &ShufBytes[0], ShufBytes.size());
2271 //! Lower byte immediate operations for v16i8 vectors:
2273 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2276 MVT VT = Op.getValueType();
2277 DebugLoc dl = Op.getDebugLoc();
2279 ConstVec = Op.getOperand(0);
2280 Arg = Op.getOperand(1);
2281 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2282 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2283 ConstVec = ConstVec.getOperand(0);
2285 ConstVec = Op.getOperand(1);
2286 Arg = Op.getOperand(0);
2287 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2288 ConstVec = ConstVec.getOperand(0);
2293 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2294 uint64_t VectorBits[2];
2295 uint64_t UndefBits[2];
2296 uint64_t SplatBits, SplatUndef;
2299 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2300 && isConstantSplat(VectorBits, UndefBits,
2301 VT.getVectorElementType().getSizeInBits(),
2302 SplatBits, SplatUndef, SplatSize)) {
2304 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2305 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2307 // Turn the BUILD_VECTOR into a set of target constants:
2308 for (size_t i = 0; i < tcVecSize; ++i)
2311 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2312 DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
2317 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2318 // lowered. Return the operation, rather than a null SDValue.
2322 //! Custom lowering for CTPOP (count population)
2324 Custom lowering code that counts the number ones in the input
2325 operand. SPU has such an instruction, but it counts the number of
2326 ones per byte, which then have to be accumulated.
2328 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2329 MVT VT = Op.getValueType();
2330 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2331 DebugLoc dl = Op.getDebugLoc();
2333 switch (VT.getSimpleVT()) {
2335 assert(false && "Invalid value type!");
2337 SDValue N = Op.getOperand(0);
2338 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2340 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2341 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2343 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2347 MachineFunction &MF = DAG.getMachineFunction();
2348 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2350 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2352 SDValue N = Op.getOperand(0);
2353 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2354 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2355 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2357 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2358 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2360 // CNTB_result becomes the chain to which all of the virtual registers
2361 // CNTB_reg, SUM1_reg become associated:
2362 SDValue CNTB_result =
2363 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2365 SDValue CNTB_rescopy =
2366 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2368 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2370 return DAG.getNode(ISD::AND, dl, MVT::i16,
2371 DAG.getNode(ISD::ADD, dl, MVT::i16,
2372 DAG.getNode(ISD::SRL, dl, MVT::i16,
2379 MachineFunction &MF = DAG.getMachineFunction();
2380 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2382 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2383 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2385 SDValue N = Op.getOperand(0);
2386 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2387 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2388 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2389 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2391 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2392 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2394 // CNTB_result becomes the chain to which all of the virtual registers
2395 // CNTB_reg, SUM1_reg become associated:
2396 SDValue CNTB_result =
2397 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2399 SDValue CNTB_rescopy =
2400 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2403 DAG.getNode(ISD::SRL, dl, MVT::i32,
2404 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2408 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2409 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2411 SDValue Sum1_rescopy =
2412 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2415 DAG.getNode(ISD::SRL, dl, MVT::i32,
2416 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2419 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2420 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2422 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2432 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2434 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2435 All conversions to i64 are expanded to a libcall.
2437 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2438 SPUTargetLowering &TLI) {
2439 MVT OpVT = Op.getValueType();
2440 SDValue Op0 = Op.getOperand(0);
2441 MVT Op0VT = Op0.getValueType();
2443 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2444 || OpVT == MVT::i64) {
2445 // Convert f32 / f64 to i32 / i64 via libcall.
2447 (Op.getOpcode() == ISD::FP_TO_SINT)
2448 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2449 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2450 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2452 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2455 return Op; // return unmolested, legalized op
2458 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2460 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2461 All conversions from i64 are expanded to a libcall.
2463 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2464 SPUTargetLowering &TLI) {
2465 MVT OpVT = Op.getValueType();
2466 SDValue Op0 = Op.getOperand(0);
2467 MVT Op0VT = Op0.getValueType();
2469 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2470 || Op0VT == MVT::i64) {
2471 // Convert i32, i64 to f64 via libcall:
2473 (Op.getOpcode() == ISD::SINT_TO_FP)
2474 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2475 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2476 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2478 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2481 return Op; // return unmolested, legalized
2484 //! Lower ISD::SETCC
2486 This handles MVT::f64 (double floating point) condition lowering
2488 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2489 const TargetLowering &TLI) {
2490 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2491 DebugLoc dl = Op.getDebugLoc();
2492 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2494 SDValue lhs = Op.getOperand(0);
2495 SDValue rhs = Op.getOperand(1);
2496 MVT lhsVT = lhs.getValueType();
2497 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2499 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2500 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2501 MVT IntVT(MVT::i64);
2503 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2504 // selected to a NOP:
2505 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2507 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2508 DAG.getNode(ISD::SRL, dl, IntVT,
2509 i64lhs, DAG.getConstant(32, MVT::i32)));
2510 SDValue lhsHi32abs =
2511 DAG.getNode(ISD::AND, dl, MVT::i32,
2512 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2514 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2516 // SETO and SETUO only use the lhs operand:
2517 if (CC->get() == ISD::SETO) {
2518 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2520 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2521 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2522 DAG.getSetCC(dl, ccResultVT,
2523 lhs, DAG.getConstantFP(0.0, lhsVT),
2525 DAG.getConstant(ccResultAllOnes, ccResultVT));
2526 } else if (CC->get() == ISD::SETUO) {
2527 // Evaluates to true if Op0 is [SQ]NaN
2528 return DAG.getNode(ISD::AND, dl, ccResultVT,
2529 DAG.getSetCC(dl, ccResultVT,
2531 DAG.getConstant(0x7ff00000, MVT::i32),
2533 DAG.getSetCC(dl, ccResultVT,
2535 DAG.getConstant(0, MVT::i32),
2539 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2541 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2542 DAG.getNode(ISD::SRL, dl, IntVT,
2543 i64rhs, DAG.getConstant(32, MVT::i32)));
2545 // If a value is negative, subtract from the sign magnitude constant:
2546 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2548 // Convert the sign-magnitude representation into 2's complement:
2549 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2550 lhsHi32, DAG.getConstant(31, MVT::i32));
2551 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2553 DAG.getNode(ISD::SELECT, dl, IntVT,
2554 lhsSelectMask, lhsSignMag2TC, i64lhs);
2556 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2557 rhsHi32, DAG.getConstant(31, MVT::i32));
2558 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2560 DAG.getNode(ISD::SELECT, dl, IntVT,
2561 rhsSelectMask, rhsSignMag2TC, i64rhs);
2565 switch (CC->get()) {
2568 compareOp = ISD::SETEQ; break;
2571 compareOp = ISD::SETGT; break;
2574 compareOp = ISD::SETGE; break;
2577 compareOp = ISD::SETLT; break;
2580 compareOp = ISD::SETLE; break;
2583 compareOp = ISD::SETNE; break;
2585 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2591 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2592 (ISD::CondCode) compareOp);
2594 if ((CC->get() & 0x8) == 0) {
2595 // Ordered comparison:
2596 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2597 lhs, DAG.getConstantFP(0.0, MVT::f64),
2599 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2600 rhs, DAG.getConstantFP(0.0, MVT::f64),
2602 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2604 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2610 //! Lower ISD::SELECT_CC
2612 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2615 \note Need to revisit this in the future: if the code path through the true
2616 and false value computations is longer than the latency of a branch (6
2617 cycles), then it would be more advantageous to branch and insert a new basic
2618 block and branch on the condition. However, this code does not make that
2619 assumption, given the simplisitc uses so far.
2622 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2623 const TargetLowering &TLI) {
2624 MVT VT = Op.getValueType();
2625 SDValue lhs = Op.getOperand(0);
2626 SDValue rhs = Op.getOperand(1);
2627 SDValue trueval = Op.getOperand(2);
2628 SDValue falseval = Op.getOperand(3);
2629 SDValue condition = Op.getOperand(4);
2630 DebugLoc dl = Op.getDebugLoc();
2632 // NOTE: SELB's arguments: $rA, $rB, $mask
2634 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2635 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2636 // condition was true and 0s where the condition was false. Hence, the
2637 // arguments to SELB get reversed.
2639 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2640 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2641 // with another "cannot select select_cc" assert:
2643 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2644 TLI.getSetCCResultType(Op.getValueType()),
2645 lhs, rhs, condition);
2646 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2649 //! Custom lower ISD::TRUNCATE
2650 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2652 // Type to truncate to
2653 MVT VT = Op.getValueType();
2654 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2655 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2656 DebugLoc dl = Op.getDebugLoc();
2658 // Type to truncate from
2659 SDValue Op0 = Op.getOperand(0);
2660 MVT Op0VT = Op0.getValueType();
2662 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2663 // Create shuffle mask, least significant doubleword of quadword
2664 unsigned maskHigh = 0x08090a0b;
2665 unsigned maskLow = 0x0c0d0e0f;
2666 // Use a shuffle to perform the truncation
2667 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2668 DAG.getConstant(maskHigh, MVT::i32),
2669 DAG.getConstant(maskLow, MVT::i32),
2670 DAG.getConstant(maskHigh, MVT::i32),
2671 DAG.getConstant(maskLow, MVT::i32));
2673 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2674 Op0, Op0, shufMask);
2676 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2679 return SDValue(); // Leave the truncate unmolested
2682 //! Custom (target-specific) lowering entry point
2684 This is where LLVM's DAG selection process calls to do target-specific
2688 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2690 unsigned Opc = (unsigned) Op.getOpcode();
2691 MVT VT = Op.getValueType();
2695 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2696 cerr << "Op.getOpcode() = " << Opc << "\n";
2697 cerr << "*Op.getNode():\n";
2698 Op.getNode()->dump();
2705 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2707 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2708 case ISD::ConstantPool:
2709 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2710 case ISD::GlobalAddress:
2711 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2712 case ISD::JumpTable:
2713 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2714 case ISD::ConstantFP:
2715 return LowerConstantFP(Op, DAG);
2716 case ISD::FORMAL_ARGUMENTS:
2717 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2719 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2721 return LowerRET(Op, DAG, getTargetMachine());
2723 // i8, i64 math ops:
2732 return LowerI8Math(Op, DAG, Opc, *this);
2736 case ISD::FP_TO_SINT:
2737 case ISD::FP_TO_UINT:
2738 return LowerFP_TO_INT(Op, DAG, *this);
2740 case ISD::SINT_TO_FP:
2741 case ISD::UINT_TO_FP:
2742 return LowerINT_TO_FP(Op, DAG, *this);
2744 // Vector-related lowering.
2745 case ISD::BUILD_VECTOR:
2746 return LowerBUILD_VECTOR(Op, DAG);
2747 case ISD::SCALAR_TO_VECTOR:
2748 return LowerSCALAR_TO_VECTOR(Op, DAG);
2749 case ISD::VECTOR_SHUFFLE:
2750 return LowerVECTOR_SHUFFLE(Op, DAG);
2751 case ISD::EXTRACT_VECTOR_ELT:
2752 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2753 case ISD::INSERT_VECTOR_ELT:
2754 return LowerINSERT_VECTOR_ELT(Op, DAG);
2756 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2760 return LowerByteImmed(Op, DAG);
2762 // Vector and i8 multiply:
2765 return LowerI8Math(Op, DAG, Opc, *this);
2768 return LowerCTPOP(Op, DAG);
2770 case ISD::SELECT_CC:
2771 return LowerSELECT_CC(Op, DAG, *this);
2774 return LowerSETCC(Op, DAG, *this);
2777 return LowerTRUNCATE(Op, DAG);
2783 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2784 SmallVectorImpl<SDValue>&Results,
2788 unsigned Opc = (unsigned) N->getOpcode();
2789 MVT OpVT = N->getValueType(0);
2793 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2794 cerr << "Op.getOpcode() = " << Opc << "\n";
2795 cerr << "*Op.getNode():\n";
2803 /* Otherwise, return unchanged */
2806 //===----------------------------------------------------------------------===//
2807 // Target Optimization Hooks
2808 //===----------------------------------------------------------------------===//
2811 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2814 TargetMachine &TM = getTargetMachine();
2816 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2817 SelectionDAG &DAG = DCI.DAG;
2818 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2819 MVT NodeVT = N->getValueType(0); // The node's value type
2820 MVT Op0VT = Op0.getValueType(); // The first operand's result
2821 SDValue Result; // Initially, empty result
2822 DebugLoc dl = N->getDebugLoc();
2824 switch (N->getOpcode()) {
2827 SDValue Op1 = N->getOperand(1);
2829 if (Op0.getOpcode() == SPUISD::IndirectAddr
2830 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2831 // Normalize the operands to reduce repeated code
2832 SDValue IndirectArg = Op0, AddArg = Op1;
2834 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2839 if (isa<ConstantSDNode>(AddArg)) {
2840 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2841 SDValue IndOp1 = IndirectArg.getOperand(1);
2843 if (CN0->isNullValue()) {
2844 // (add (SPUindirect <arg>, <arg>), 0) ->
2845 // (SPUindirect <arg>, <arg>)
2847 #if !defined(NDEBUG)
2848 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2850 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2851 << "With: (SPUindirect <arg>, <arg>)\n";
2856 } else if (isa<ConstantSDNode>(IndOp1)) {
2857 // (add (SPUindirect <arg>, <const>), <const>) ->
2858 // (SPUindirect <arg>, <const + const>)
2859 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2860 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2861 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2863 #if !defined(NDEBUG)
2864 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2866 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2867 << "), " << CN0->getSExtValue() << ")\n"
2868 << "With: (SPUindirect <arg>, "
2869 << combinedConst << ")\n";
2873 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2874 IndirectArg, combinedValue);
2880 case ISD::SIGN_EXTEND:
2881 case ISD::ZERO_EXTEND:
2882 case ISD::ANY_EXTEND: {
2883 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2884 // (any_extend (SPUextract_elt0 <arg>)) ->
2885 // (SPUextract_elt0 <arg>)
2886 // Types must match, however...
2887 #if !defined(NDEBUG)
2888 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2889 cerr << "\nReplace: ";
2892 Op0.getNode()->dump(&DAG);
2901 case SPUISD::IndirectAddr: {
2902 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2903 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2904 if (CN != 0 && CN->getZExtValue() == 0) {
2905 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2906 // (SPUaform <addr>, 0)
2908 DEBUG(cerr << "Replace: ");
2909 DEBUG(N->dump(&DAG));
2910 DEBUG(cerr << "\nWith: ");
2911 DEBUG(Op0.getNode()->dump(&DAG));
2912 DEBUG(cerr << "\n");
2916 } else if (Op0.getOpcode() == ISD::ADD) {
2917 SDValue Op1 = N->getOperand(1);
2918 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2919 // (SPUindirect (add <arg>, <arg>), 0) ->
2920 // (SPUindirect <arg>, <arg>)
2921 if (CN1->isNullValue()) {
2923 #if !defined(NDEBUG)
2924 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2926 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2927 << "With: (SPUindirect <arg>, <arg>)\n";
2931 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2932 Op0.getOperand(0), Op0.getOperand(1));
2938 case SPUISD::SHLQUAD_L_BITS:
2939 case SPUISD::SHLQUAD_L_BYTES:
2940 case SPUISD::VEC_SHL:
2941 case SPUISD::VEC_SRL:
2942 case SPUISD::VEC_SRA:
2943 case SPUISD::ROTBYTES_LEFT: {
2944 SDValue Op1 = N->getOperand(1);
2946 // Kill degenerate vector shifts:
2947 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2948 if (CN->isNullValue()) {
2954 case SPUISD::PREFSLOT2VEC: {
2955 switch (Op0.getOpcode()) {
2958 case ISD::ANY_EXTEND:
2959 case ISD::ZERO_EXTEND:
2960 case ISD::SIGN_EXTEND: {
2961 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2963 // but only if the SPUprefslot2vec and <arg> types match.
2964 SDValue Op00 = Op0.getOperand(0);
2965 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2966 SDValue Op000 = Op00.getOperand(0);
2967 if (Op000.getValueType() == NodeVT) {
2973 case SPUISD::VEC2PREFSLOT: {
2974 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2976 Result = Op0.getOperand(0);
2984 // Otherwise, return unchanged.
2986 if (Result.getNode()) {
2987 DEBUG(cerr << "\nReplace.SPU: ");
2988 DEBUG(N->dump(&DAG));
2989 DEBUG(cerr << "\nWith: ");
2990 DEBUG(Result.getNode()->dump(&DAG));
2991 DEBUG(cerr << "\n");
2998 //===----------------------------------------------------------------------===//
2999 // Inline Assembly Support
3000 //===----------------------------------------------------------------------===//
3002 /// getConstraintType - Given a constraint letter, return the type of
3003 /// constraint it is for this target.
3004 SPUTargetLowering::ConstraintType
3005 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3006 if (ConstraintLetter.size() == 1) {
3007 switch (ConstraintLetter[0]) {
3014 return C_RegisterClass;
3017 return TargetLowering::getConstraintType(ConstraintLetter);
3020 std::pair<unsigned, const TargetRegisterClass*>
3021 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3024 if (Constraint.size() == 1) {
3025 // GCC RS6000 Constraint Letters
3026 switch (Constraint[0]) {
3030 return std::make_pair(0U, SPU::R64CRegisterClass);
3031 return std::make_pair(0U, SPU::R32CRegisterClass);
3034 return std::make_pair(0U, SPU::R32FPRegisterClass);
3035 else if (VT == MVT::f64)
3036 return std::make_pair(0U, SPU::R64FPRegisterClass);
3039 return std::make_pair(0U, SPU::GPRCRegisterClass);
3043 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3046 //! Compute used/known bits for a SPU operand
3048 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3052 const SelectionDAG &DAG,
3053 unsigned Depth ) const {
3055 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3057 switch (Op.getOpcode()) {
3059 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3065 case SPUISD::PREFSLOT2VEC:
3066 case SPUISD::LDRESULT:
3067 case SPUISD::VEC2PREFSLOT:
3068 case SPUISD::SHLQUAD_L_BITS:
3069 case SPUISD::SHLQUAD_L_BYTES:
3070 case SPUISD::VEC_SHL:
3071 case SPUISD::VEC_SRL:
3072 case SPUISD::VEC_SRA:
3073 case SPUISD::VEC_ROTL:
3074 case SPUISD::VEC_ROTR:
3075 case SPUISD::ROTBYTES_LEFT:
3076 case SPUISD::SELECT_MASK:
3083 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3084 unsigned Depth) const {
3085 switch (Op.getOpcode()) {
3090 MVT VT = Op.getValueType();
3092 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3095 return VT.getSizeInBits();
3100 // LowerAsmOperandForConstraint
3102 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3103 char ConstraintLetter,
3105 std::vector<SDValue> &Ops,
3106 SelectionDAG &DAG) const {
3107 // Default, for the time being, to the base class handler
3108 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3112 /// isLegalAddressImmediate - Return true if the integer value can be used
3113 /// as the offset of the target addressing mode.
3114 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3115 const Type *Ty) const {
3116 // SPU's addresses are 256K:
3117 return (V > -(1 << 18) && V < (1 << 18) - 1);
3120 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3125 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3126 // The SPU target isn't yet aware of offsets.