1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT mapping to useful data for Cell SPU
42 struct valtype_map_s {
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
83 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
87 // Fold away setcc operations if possible.
90 // Use _setjmp/_longjmp instead of setjmp/longjmp.
91 setUseUnderscoreSetJmp(true);
92 setUseUnderscoreLongJmp(true);
94 // Set up the SPU's register classes:
95 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
96 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
97 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
98 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
99 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
100 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
101 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
103 // SPU has no sign or zero extended loads for i1, i8, i16:
104 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
105 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
106 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
108 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
109 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
111 // SPU constant load actions are custom lowered:
112 setOperationAction(ISD::Constant, MVT::i64, Custom);
113 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
114 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
116 // SPU's loads and stores have to be custom lowered:
117 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
119 MVT VT = (MVT::SimpleValueType)sctype;
121 setOperationAction(ISD::LOAD, VT, Custom);
122 setOperationAction(ISD::STORE, VT, Custom);
123 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
124 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
125 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
127 // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
128 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
129 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
131 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
132 MVT StoreVT = (MVT::SimpleValueType) stype;
133 setTruncStoreAction(VT, StoreVT, Expand);
137 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
139 MVT VT = (MVT::SimpleValueType) sctype;
141 setOperationAction(ISD::LOAD, VT, Custom);
142 setOperationAction(ISD::STORE, VT, Custom);
144 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
145 MVT StoreVT = (MVT::SimpleValueType) stype;
146 setTruncStoreAction(VT, StoreVT, Expand);
150 // Custom lower BRCOND for i8 to "promote" the result to whatever the result
151 // operand happens to be:
152 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
154 // Expand the jumptable branches
155 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
156 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
158 // Custom lower SELECT_CC for most cases, but expand by default
159 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
160 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
161 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
162 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
163 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
165 // SPU has no intrinsics for these particular operations:
166 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
168 // SPU has no SREM/UREM instructions
169 setOperationAction(ISD::SREM, MVT::i32, Expand);
170 setOperationAction(ISD::UREM, MVT::i32, Expand);
171 setOperationAction(ISD::SREM, MVT::i64, Expand);
172 setOperationAction(ISD::UREM, MVT::i64, Expand);
174 // We don't support sin/cos/sqrt/fmod
175 setOperationAction(ISD::FSIN , MVT::f64, Expand);
176 setOperationAction(ISD::FCOS , MVT::f64, Expand);
177 setOperationAction(ISD::FREM , MVT::f64, Expand);
178 setOperationAction(ISD::FSIN , MVT::f32, Expand);
179 setOperationAction(ISD::FCOS , MVT::f32, Expand);
180 setOperationAction(ISD::FREM , MVT::f32, Expand);
182 // If we're enabling GP optimizations, use hardware square root
183 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
184 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
186 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
187 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
189 // SPU can do rotate right and left, so legalize it... but customize for i8
190 // because instructions don't exist.
192 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
194 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
195 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
196 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
198 setOperationAction(ISD::ROTL, MVT::i32, Legal);
199 setOperationAction(ISD::ROTL, MVT::i16, Legal);
200 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
207 // Make these operations legal and handle them during instruction selection:
208 setOperationAction(ISD::SHL, MVT::i64, Legal);
209 setOperationAction(ISD::SRL, MVT::i64, Legal);
210 setOperationAction(ISD::SRA, MVT::i64, Legal);
212 // Custom lower i8, i32 and i64 multiplications
213 setOperationAction(ISD::MUL, MVT::i8, Custom);
214 setOperationAction(ISD::MUL, MVT::i32, Legal);
215 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
217 // Need to custom handle (some) common i8, i64 math ops
218 setOperationAction(ISD::ADD, MVT::i8, Custom);
219 setOperationAction(ISD::ADD, MVT::i64, Custom);
220 setOperationAction(ISD::SUB, MVT::i8, Custom);
221 setOperationAction(ISD::SUB, MVT::i64, Custom);
223 // SPU does not have BSWAP. It does have i32 support CTLZ.
224 // CTPOP has to be custom lowered.
225 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
226 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
228 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
229 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
230 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
231 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
233 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
234 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
236 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
238 // SPU has a version of select that implements (a&~c)|(b&c), just like
239 // select ought to work:
240 setOperationAction(ISD::SELECT, MVT::i8, Legal);
241 setOperationAction(ISD::SELECT, MVT::i16, Legal);
242 setOperationAction(ISD::SELECT, MVT::i32, Legal);
243 setOperationAction(ISD::SELECT, MVT::i64, Legal);
245 setOperationAction(ISD::SETCC, MVT::i8, Legal);
246 setOperationAction(ISD::SETCC, MVT::i16, Legal);
247 setOperationAction(ISD::SETCC, MVT::i32, Legal);
248 setOperationAction(ISD::SETCC, MVT::i64, Legal);
250 // Zero extension and sign extension for i64 have to be
252 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
253 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
255 // Custom lower i128 -> i64 truncates
256 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
258 // SPU has a legal FP -> signed INT instruction
259 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
260 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
261 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
262 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
264 // FDIV on SPU requires custom lowering
265 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
267 // SPU has [U|S]INT_TO_FP
268 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
269 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
270 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
273 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
274 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
275 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
277 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
278 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
279 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
280 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
282 // We cannot sextinreg(i1). Expand to shifts.
283 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
285 // Support label based line numbers.
286 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
287 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
289 // We want to legalize GlobalAddress and ConstantPool nodes into the
290 // appropriate instructions to materialize the address.
291 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
293 MVT VT = (MVT::SimpleValueType)sctype;
295 setOperationAction(ISD::GlobalAddress, VT, Custom);
296 setOperationAction(ISD::ConstantPool, VT, Custom);
297 setOperationAction(ISD::JumpTable, VT, Custom);
300 // RET must be custom lowered, to meet ABI requirements
301 setOperationAction(ISD::RET, MVT::Other, Custom);
303 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
304 setOperationAction(ISD::VASTART , MVT::Other, Custom);
306 // Use the default implementation.
307 setOperationAction(ISD::VAARG , MVT::Other, Expand);
308 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
309 setOperationAction(ISD::VAEND , MVT::Other, Expand);
310 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
311 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
312 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
313 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
315 // Cell SPU has instructions for converting between i64 and fp.
316 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
317 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
319 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
320 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
322 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
323 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
325 // First set operation action for all vector types to expand. Then we
326 // will selectively turn on ones that can be effectively codegen'd.
327 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
328 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
329 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
330 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
331 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
332 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
334 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
335 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
336 MVT VT = (MVT::SimpleValueType)i;
338 // add/sub are legal for all supported vector VT's.
339 setOperationAction(ISD::ADD , VT, Legal);
340 setOperationAction(ISD::SUB , VT, Legal);
341 // mul has to be custom lowered.
342 // TODO: v2i64 vector multiply
343 setOperationAction(ISD::MUL , VT, Legal);
345 setOperationAction(ISD::AND , VT, Legal);
346 setOperationAction(ISD::OR , VT, Legal);
347 setOperationAction(ISD::XOR , VT, Legal);
348 setOperationAction(ISD::LOAD , VT, Legal);
349 setOperationAction(ISD::SELECT, VT, Legal);
350 setOperationAction(ISD::STORE, VT, Legal);
352 // These operations need to be expanded:
353 setOperationAction(ISD::SDIV, VT, Expand);
354 setOperationAction(ISD::SREM, VT, Expand);
355 setOperationAction(ISD::UDIV, VT, Expand);
356 setOperationAction(ISD::UREM, VT, Expand);
358 // Custom lower build_vector, constant pool spills, insert and
359 // extract vector elements:
360 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
361 setOperationAction(ISD::ConstantPool, VT, Custom);
362 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
363 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
364 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
365 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
368 setOperationAction(ISD::AND, MVT::v16i8, Custom);
369 setOperationAction(ISD::OR, MVT::v16i8, Custom);
370 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
371 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
373 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
375 setShiftAmountType(MVT::i32);
376 setBooleanContents(ZeroOrNegativeOneBooleanContent);
378 setStackPointerRegisterToSaveRestore(SPU::R1);
380 // We have target-specific dag combine patterns for the following nodes:
381 setTargetDAGCombine(ISD::ADD);
382 setTargetDAGCombine(ISD::ZERO_EXTEND);
383 setTargetDAGCombine(ISD::SIGN_EXTEND);
384 setTargetDAGCombine(ISD::ANY_EXTEND);
386 computeRegisterProperties();
388 // Set pre-RA register scheduler default to BURR, which produces slightly
389 // better code than the default (could also be TDRR, but TargetLowering.h
390 // needs a mod to support that model):
391 setSchedulingPreference(SchedulingForRegPressure);
395 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
397 if (node_names.empty()) {
398 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
399 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
400 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
401 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
402 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
403 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
404 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
405 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
406 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
407 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
408 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
409 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
410 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
411 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
412 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
413 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
414 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
415 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
416 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
417 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
418 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
419 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
420 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
421 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
422 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
423 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
424 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
427 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
429 return ((i != node_names.end()) ? i->second : 0);
432 //===----------------------------------------------------------------------===//
433 // Return the Cell SPU's SETCC result type
434 //===----------------------------------------------------------------------===//
436 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
437 // i16 and i32 are valid SETCC result types
438 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
441 //===----------------------------------------------------------------------===//
442 // Calling convention code:
443 //===----------------------------------------------------------------------===//
445 #include "SPUGenCallingConv.inc"
447 //===----------------------------------------------------------------------===//
448 // LowerOperation implementation
449 //===----------------------------------------------------------------------===//
451 /// Custom lower loads for CellSPU
453 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
454 within a 16-byte block, we have to rotate to extract the requested element.
456 For extending loads, we also want to ensure that the following sequence is
457 emitted, e.g. for MVT::f32 extending load to MVT::f64:
461 %2 v16i8,ch = rotate %1
462 %3 v4f8, ch = bitconvert %2
463 %4 f32 = vec2perfslot %3
464 %5 f64 = fp_extend %4
468 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
469 LoadSDNode *LN = cast<LoadSDNode>(Op);
470 SDValue the_chain = LN->getChain();
471 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
472 MVT InVT = LN->getMemoryVT();
473 MVT OutVT = Op.getValueType();
474 ISD::LoadExtType ExtType = LN->getExtensionType();
475 unsigned alignment = LN->getAlignment();
476 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
478 switch (LN->getAddressingMode()) {
479 case ISD::UNINDEXED: {
481 SDValue basePtr = LN->getBasePtr();
484 if (alignment == 16) {
487 // Special cases for a known aligned load to simplify the base pointer
488 // and the rotation amount:
489 if (basePtr.getOpcode() == ISD::ADD
490 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
491 // Known offset into basePtr
492 int64_t offset = CN->getSExtValue();
493 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
498 rotate = DAG.getConstant(rotamt, MVT::i16);
500 // Simplify the base pointer for this case:
501 basePtr = basePtr.getOperand(0);
502 if ((offset & ~0xf) > 0) {
503 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
505 DAG.getConstant((offset & ~0xf), PtrVT));
507 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
508 || (basePtr.getOpcode() == SPUISD::IndirectAddr
509 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
510 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
511 // Plain aligned a-form address: rotate into preferred slot
512 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
513 int64_t rotamt = -vtm->prefslot_byte;
516 rotate = DAG.getConstant(rotamt, MVT::i16);
518 // Offset the rotate amount by the basePtr and the preferred slot
520 int64_t rotamt = -vtm->prefslot_byte;
523 rotate = DAG.getNode(ISD::ADD, PtrVT,
525 DAG.getConstant(rotamt, PtrVT));
528 // Unaligned load: must be more pessimistic about addressing modes:
529 if (basePtr.getOpcode() == ISD::ADD) {
530 MachineFunction &MF = DAG.getMachineFunction();
531 MachineRegisterInfo &RegInfo = MF.getRegInfo();
532 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
535 SDValue Op0 = basePtr.getOperand(0);
536 SDValue Op1 = basePtr.getOperand(1);
538 if (isa<ConstantSDNode>(Op1)) {
539 // Convert the (add <ptr>, <const>) to an indirect address contained
540 // in a register. Note that this is done because we need to avoid
541 // creating a 0(reg) d-form address due to the SPU's block loads.
542 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
543 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
544 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
546 // Convert the (add <arg1>, <arg2>) to an indirect address, which
547 // will likely be lowered as a reg(reg) x-form address.
548 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
551 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
553 DAG.getConstant(0, PtrVT));
556 // Offset the rotate amount by the basePtr and the preferred slot
558 rotate = DAG.getNode(ISD::ADD, PtrVT,
560 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
563 // Re-emit as a v16i8 vector load
564 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
565 LN->getSrcValue(), LN->getSrcValueOffset(),
566 LN->isVolatile(), 16);
569 the_chain = result.getValue(1);
571 // Rotate into the preferred slot:
572 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
573 result.getValue(0), rotate);
575 // Convert the loaded v16i8 vector to the appropriate vector type
576 // specified by the operand:
577 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
578 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
579 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
581 // Handle extending loads by extending the scalar result:
582 if (ExtType == ISD::SEXTLOAD) {
583 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
584 } else if (ExtType == ISD::ZEXTLOAD) {
585 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
586 } else if (ExtType == ISD::EXTLOAD) {
587 unsigned NewOpc = ISD::ANY_EXTEND;
589 if (OutVT.isFloatingPoint())
590 NewOpc = ISD::FP_EXTEND;
592 result = DAG.getNode(NewOpc, OutVT, result);
595 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
596 SDValue retops[2] = {
601 result = DAG.getNode(SPUISD::LDRESULT, retvts,
602 retops, sizeof(retops) / sizeof(retops[0]));
609 case ISD::LAST_INDEXED_MODE:
610 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
612 cerr << (unsigned) LN->getAddressingMode() << "\n";
620 /// Custom lower stores for CellSPU
622 All CellSPU stores are aligned to 16-byte boundaries, so for elements
623 within a 16-byte block, we have to generate a shuffle to insert the
624 requested element into its place, then store the resulting block.
627 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
628 StoreSDNode *SN = cast<StoreSDNode>(Op);
629 SDValue Value = SN->getValue();
630 MVT VT = Value.getValueType();
631 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
632 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
633 unsigned alignment = SN->getAlignment();
635 switch (SN->getAddressingMode()) {
636 case ISD::UNINDEXED: {
637 // The vector type we really want to load from the 16-byte chunk.
638 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
639 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
641 SDValue alignLoadVec;
642 SDValue basePtr = SN->getBasePtr();
643 SDValue the_chain = SN->getChain();
644 SDValue insertEltOffs;
646 if (alignment == 16) {
649 // Special cases for a known aligned load to simplify the base pointer
650 // and insertion byte:
651 if (basePtr.getOpcode() == ISD::ADD
652 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
653 // Known offset into basePtr
654 int64_t offset = CN->getSExtValue();
656 // Simplify the base pointer for this case:
657 basePtr = basePtr.getOperand(0);
658 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
660 DAG.getConstant((offset & 0xf), PtrVT));
662 if ((offset & ~0xf) > 0) {
663 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
665 DAG.getConstant((offset & ~0xf), PtrVT));
668 // Otherwise, assume it's at byte 0 of basePtr
669 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
671 DAG.getConstant(0, PtrVT));
674 // Unaligned load: must be more pessimistic about addressing modes:
675 if (basePtr.getOpcode() == ISD::ADD) {
676 MachineFunction &MF = DAG.getMachineFunction();
677 MachineRegisterInfo &RegInfo = MF.getRegInfo();
678 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
681 SDValue Op0 = basePtr.getOperand(0);
682 SDValue Op1 = basePtr.getOperand(1);
684 if (isa<ConstantSDNode>(Op1)) {
685 // Convert the (add <ptr>, <const>) to an indirect address contained
686 // in a register. Note that this is done because we need to avoid
687 // creating a 0(reg) d-form address due to the SPU's block loads.
688 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
689 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
690 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
692 // Convert the (add <arg1>, <arg2>) to an indirect address, which
693 // will likely be lowered as a reg(reg) x-form address.
694 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
697 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
699 DAG.getConstant(0, PtrVT));
702 // Insertion point is solely determined by basePtr's contents
703 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
705 DAG.getConstant(0, PtrVT));
708 // Re-emit as a v16i8 vector load
709 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
710 SN->getSrcValue(), SN->getSrcValueOffset(),
711 SN->isVolatile(), 16);
714 the_chain = alignLoadVec.getValue(1);
716 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
717 SDValue theValue = SN->getValue();
721 && (theValue.getOpcode() == ISD::AssertZext
722 || theValue.getOpcode() == ISD::AssertSext)) {
723 // Drill down and get the value for zero- and sign-extended
725 theValue = theValue.getOperand(0);
728 // If the base pointer is already a D-form address, then just create
729 // a new D-form address with a slot offset and the orignal base pointer.
730 // Otherwise generate a D-form address with the slot offset relative
731 // to the stack pointer, which is always aligned.
733 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
734 cerr << "CellSPU LowerSTORE: basePtr = ";
735 basePtr.getNode()->dump(&DAG);
740 SDValue insertEltOp =
741 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
742 SDValue vectorizeOp =
743 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
745 result = DAG.getNode(SPUISD::SHUFB, vecVT,
746 vectorizeOp, alignLoadVec,
747 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
749 result = DAG.getStore(the_chain, result, basePtr,
750 LN->getSrcValue(), LN->getSrcValueOffset(),
751 LN->isVolatile(), LN->getAlignment());
753 #if 0 && !defined(NDEBUG)
754 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
755 const SDValue ¤tRoot = DAG.getRoot();
758 cerr << "------- CellSPU:LowerStore result:\n";
761 DAG.setRoot(currentRoot);
772 case ISD::LAST_INDEXED_MODE:
773 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
775 cerr << (unsigned) SN->getAddressingMode() << "\n";
783 /// Generate the address of a constant pool entry.
785 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
786 MVT PtrVT = Op.getValueType();
787 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
788 Constant *C = CP->getConstVal();
789 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
790 SDValue Zero = DAG.getConstant(0, PtrVT);
791 const TargetMachine &TM = DAG.getTarget();
793 if (TM.getRelocationModel() == Reloc::Static) {
794 if (!ST->usingLargeMem()) {
795 // Just return the SDValue with the constant pool address in it.
796 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
798 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
799 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
800 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
805 "LowerConstantPool: Relocation model other than static"
811 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
812 MVT PtrVT = Op.getValueType();
813 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
814 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
815 SDValue Zero = DAG.getConstant(0, PtrVT);
816 const TargetMachine &TM = DAG.getTarget();
818 if (TM.getRelocationModel() == Reloc::Static) {
819 if (!ST->usingLargeMem()) {
820 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
822 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
823 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
824 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
829 "LowerJumpTable: Relocation model other than static not supported.");
834 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
835 MVT PtrVT = Op.getValueType();
836 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
837 GlobalValue *GV = GSDN->getGlobal();
838 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
839 const TargetMachine &TM = DAG.getTarget();
840 SDValue Zero = DAG.getConstant(0, PtrVT);
842 if (TM.getRelocationModel() == Reloc::Static) {
843 if (!ST->usingLargeMem()) {
844 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
846 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
847 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
848 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
851 cerr << "LowerGlobalAddress: Relocation model other than static not "
860 //! Custom lower i64 integer constants
862 This code inserts all of the necessary juggling that needs to occur to load
863 a 64-bit constant into a register.
866 LowerConstant(SDValue Op, SelectionDAG &DAG) {
867 MVT VT = Op.getValueType();
869 if (VT == MVT::i64) {
870 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
871 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
872 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
873 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
875 cerr << "LowerConstant: unhandled constant type "
885 //! Custom lower double precision floating point constants
887 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
888 MVT VT = Op.getValueType();
890 if (VT == MVT::f64) {
891 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
894 "LowerConstantFP: Node is not ConstantFPSDNode");
896 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
897 SDValue T = DAG.getConstant(dbits, MVT::i64);
898 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
899 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
900 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
907 LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) {
908 SDValue Cond = Op.getOperand(1);
909 MVT CondVT = Cond.getValueType();
912 if (CondVT == MVT::i8) {
913 SDValue CondOp0 = Cond.getOperand(0);
914 if (Cond.getOpcode() == ISD::TRUNCATE) {
915 // Use the truncate's value type and ANY_EXTEND the condition (DAGcombine
916 // will then remove the truncate)
917 CondVT = CondOp0.getValueType();
918 CondOpc = ISD::ANY_EXTEND;
920 CondVT = MVT::i32; // default to something reasonable
921 CondOpc = ISD::ZERO_EXTEND;
924 Cond = DAG.getNode(CondOpc, CondVT, Op.getOperand(1));
926 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
927 Op.getOperand(0), Cond, Op.getOperand(2));
930 return SDValue(); // Unchanged
934 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
936 MachineFunction &MF = DAG.getMachineFunction();
937 MachineFrameInfo *MFI = MF.getFrameInfo();
938 MachineRegisterInfo &RegInfo = MF.getRegInfo();
939 SmallVector<SDValue, 48> ArgValues;
940 SDValue Root = Op.getOperand(0);
941 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
943 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
944 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
946 unsigned ArgOffset = SPUFrameInfo::minStackSize();
947 unsigned ArgRegIdx = 0;
948 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
950 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
952 // Add DAG nodes to load the arguments or copy them out of registers.
953 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
954 ArgNo != e; ++ArgNo) {
955 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
956 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
959 if (ArgRegIdx < NumArgRegs) {
960 const TargetRegisterClass *ArgRegClass;
962 switch (ObjectVT.getSimpleVT()) {
964 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
965 << ObjectVT.getMVTString()
970 ArgRegClass = &SPU::R8CRegClass;
973 ArgRegClass = &SPU::R16CRegClass;
976 ArgRegClass = &SPU::R32CRegClass;
979 ArgRegClass = &SPU::R64CRegClass;
982 ArgRegClass = &SPU::R32FPRegClass;
985 ArgRegClass = &SPU::R64FPRegClass;
993 ArgRegClass = &SPU::VECREGRegClass;
997 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
998 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
999 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1002 // We need to load the argument to a virtual register if we determined
1003 // above that we ran out of physical registers of the appropriate type
1004 // or we're forced to do vararg
1005 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1006 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1007 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1008 ArgOffset += StackSlotSize;
1011 ArgValues.push_back(ArgVal);
1013 Root = ArgVal.getOperand(0);
1018 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1019 // We will spill (79-3)+1 registers to the stack
1020 SmallVector<SDValue, 79-3+1> MemOps;
1022 // Create the frame slot
1024 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1025 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1026 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1027 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1028 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1029 Root = Store.getOperand(0);
1030 MemOps.push_back(Store);
1032 // Increment address by stack slot size for the next stored argument
1033 ArgOffset += StackSlotSize;
1035 if (!MemOps.empty())
1036 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1039 ArgValues.push_back(Root);
1041 // Return the new list of results.
1042 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1043 &ArgValues[0], ArgValues.size());
1046 /// isLSAAddress - Return the immediate to use if the specified
1047 /// value is representable as a LSA address.
1048 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1049 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1052 int Addr = C->getZExtValue();
1053 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1054 (Addr << 14 >> 14) != Addr)
1055 return 0; // Top 14 bits have to be sext of immediate.
1057 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1062 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1063 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1064 SDValue Chain = TheCall->getChain();
1065 SDValue Callee = TheCall->getCallee();
1066 unsigned NumOps = TheCall->getNumArgs();
1067 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1068 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1069 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1071 // Handy pointer type
1072 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1074 // Accumulate how many bytes are to be pushed on the stack, including the
1075 // linkage area, and parameter passing area. According to the SPU ABI,
1076 // we minimally need space for [LR] and [SP]
1077 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1079 // Set up a copy of the stack pointer for use loading and storing any
1080 // arguments that may not fit in the registers available for argument
1082 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1084 // Figure out which arguments are going to go in registers, and which in
1086 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1087 unsigned ArgRegIdx = 0;
1089 // Keep track of registers passing arguments
1090 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1091 // And the arguments passed on the stack
1092 SmallVector<SDValue, 8> MemOpChains;
1094 for (unsigned i = 0; i != NumOps; ++i) {
1095 SDValue Arg = TheCall->getArg(i);
1097 // PtrOff will be used to store the current argument to the stack if a
1098 // register cannot be found for it.
1099 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1100 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1102 switch (Arg.getValueType().getSimpleVT()) {
1103 default: assert(0 && "Unexpected ValueType for argument!");
1107 if (ArgRegIdx != NumArgRegs) {
1108 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1110 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1111 ArgOffset += StackSlotSize;
1116 if (ArgRegIdx != NumArgRegs) {
1117 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1119 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1120 ArgOffset += StackSlotSize;
1129 if (ArgRegIdx != NumArgRegs) {
1130 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1132 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1133 ArgOffset += StackSlotSize;
1139 // Update number of stack bytes actually used, insert a call sequence start
1140 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1141 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1144 if (!MemOpChains.empty()) {
1145 // Adjust the stack pointer for the stack arguments.
1146 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1147 &MemOpChains[0], MemOpChains.size());
1150 // Build a sequence of copy-to-reg nodes chained together with token chain
1151 // and flag operands which copy the outgoing args into the appropriate regs.
1153 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1154 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1156 InFlag = Chain.getValue(1);
1159 SmallVector<SDValue, 8> Ops;
1160 unsigned CallOpc = SPUISD::CALL;
1162 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1163 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1164 // node so that legalize doesn't hack it.
1165 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1166 GlobalValue *GV = G->getGlobal();
1167 MVT CalleeVT = Callee.getValueType();
1168 SDValue Zero = DAG.getConstant(0, PtrVT);
1169 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1171 if (!ST->usingLargeMem()) {
1172 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1173 // style calls, otherwise, external symbols are BRASL calls. This assumes
1174 // that declared/defined symbols are in the same compilation unit and can
1175 // be reached through PC-relative jumps.
1178 // This may be an unsafe assumption for JIT and really large compilation
1180 if (GV->isDeclaration()) {
1181 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1183 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1186 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1188 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1190 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1191 MVT CalleeVT = Callee.getValueType();
1192 SDValue Zero = DAG.getConstant(0, PtrVT);
1193 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1194 Callee.getValueType());
1196 if (!ST->usingLargeMem()) {
1197 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1199 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1201 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1202 // If this is an absolute destination address that appears to be a legal
1203 // local store address, use the munged value.
1204 Callee = SDValue(Dest, 0);
1207 Ops.push_back(Chain);
1208 Ops.push_back(Callee);
1210 // Add argument registers to the end of the list so that they are known live
1212 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1213 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1214 RegsToPass[i].second.getValueType()));
1216 if (InFlag.getNode())
1217 Ops.push_back(InFlag);
1218 // Returns a chain and a flag for retval copy to use.
1219 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1220 &Ops[0], Ops.size());
1221 InFlag = Chain.getValue(1);
1223 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1224 DAG.getIntPtrConstant(0, true), InFlag);
1225 if (TheCall->getValueType(0) != MVT::Other)
1226 InFlag = Chain.getValue(1);
1228 SDValue ResultVals[3];
1229 unsigned NumResults = 0;
1231 // If the call has results, copy the values out of the ret val registers.
1232 switch (TheCall->getValueType(0).getSimpleVT()) {
1233 default: assert(0 && "Unexpected ret value!");
1234 case MVT::Other: break;
1236 if (TheCall->getValueType(1) == MVT::i32) {
1237 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1238 ResultVals[0] = Chain.getValue(0);
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1240 Chain.getValue(2)).getValue(1);
1241 ResultVals[1] = Chain.getValue(0);
1244 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1245 ResultVals[0] = Chain.getValue(0);
1250 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1251 ResultVals[0] = Chain.getValue(0);
1256 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1257 InFlag).getValue(1);
1258 ResultVals[0] = Chain.getValue(0);
1267 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1268 InFlag).getValue(1);
1269 ResultVals[0] = Chain.getValue(0);
1274 // If the function returns void, just return the chain.
1275 if (NumResults == 0)
1278 // Otherwise, merge everything together with a MERGE_VALUES node.
1279 ResultVals[NumResults++] = Chain;
1280 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1281 return Res.getValue(Op.getResNo());
1285 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1286 SmallVector<CCValAssign, 16> RVLocs;
1287 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1288 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1289 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1290 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1292 // If this is the first return lowered for this function, add the regs to the
1293 // liveout set for the function.
1294 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1295 for (unsigned i = 0; i != RVLocs.size(); ++i)
1296 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1299 SDValue Chain = Op.getOperand(0);
1302 // Copy the result values into the output registers.
1303 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1304 CCValAssign &VA = RVLocs[i];
1305 assert(VA.isRegLoc() && "Can only return in registers!");
1306 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1307 Flag = Chain.getValue(1);
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1313 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1317 //===----------------------------------------------------------------------===//
1318 // Vector related lowering:
1319 //===----------------------------------------------------------------------===//
1321 static ConstantSDNode *
1322 getVecImm(SDNode *N) {
1323 SDValue OpVal(0, 0);
1325 // Check to see if this buildvec has a single non-undef value in its elements.
1326 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1327 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1328 if (OpVal.getNode() == 0)
1329 OpVal = N->getOperand(i);
1330 else if (OpVal != N->getOperand(i))
1334 if (OpVal.getNode() != 0) {
1335 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1340 return 0; // All UNDEF: use implicit def.; not Constant node
1343 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1344 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1346 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1348 if (ConstantSDNode *CN = getVecImm(N)) {
1349 uint64_t Value = CN->getZExtValue();
1350 if (ValueType == MVT::i64) {
1351 uint64_t UValue = CN->getZExtValue();
1352 uint32_t upper = uint32_t(UValue >> 32);
1353 uint32_t lower = uint32_t(UValue);
1356 Value = Value >> 32;
1358 if (Value <= 0x3ffff)
1359 return DAG.getTargetConstant(Value, ValueType);
1365 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1366 /// and the value fits into a signed 16-bit constant, and if so, return the
1368 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1370 if (ConstantSDNode *CN = getVecImm(N)) {
1371 int64_t Value = CN->getSExtValue();
1372 if (ValueType == MVT::i64) {
1373 uint64_t UValue = CN->getZExtValue();
1374 uint32_t upper = uint32_t(UValue >> 32);
1375 uint32_t lower = uint32_t(UValue);
1378 Value = Value >> 32;
1380 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1381 return DAG.getTargetConstant(Value, ValueType);
1388 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1389 /// and the value fits into a signed 10-bit constant, and if so, return the
1391 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1393 if (ConstantSDNode *CN = getVecImm(N)) {
1394 int64_t Value = CN->getSExtValue();
1395 if (ValueType == MVT::i64) {
1396 uint64_t UValue = CN->getZExtValue();
1397 uint32_t upper = uint32_t(UValue >> 32);
1398 uint32_t lower = uint32_t(UValue);
1401 Value = Value >> 32;
1403 if (isS10Constant(Value))
1404 return DAG.getTargetConstant(Value, ValueType);
1410 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1411 /// and the value fits into a signed 8-bit constant, and if so, return the
1414 /// @note: The incoming vector is v16i8 because that's the only way we can load
1415 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1417 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1419 if (ConstantSDNode *CN = getVecImm(N)) {
1420 int Value = (int) CN->getZExtValue();
1421 if (ValueType == MVT::i16
1422 && Value <= 0xffff /* truncated from uint64_t */
1423 && ((short) Value >> 8) == ((short) Value & 0xff))
1424 return DAG.getTargetConstant(Value & 0xff, ValueType);
1425 else if (ValueType == MVT::i8
1426 && (Value & 0xff) == Value)
1427 return DAG.getTargetConstant(Value, ValueType);
1433 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1434 /// and the value fits into a signed 16-bit constant, and if so, return the
1436 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1438 if (ConstantSDNode *CN = getVecImm(N)) {
1439 uint64_t Value = CN->getZExtValue();
1440 if ((ValueType == MVT::i32
1441 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1442 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1443 return DAG.getTargetConstant(Value >> 16, ValueType);
1449 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1450 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1451 if (ConstantSDNode *CN = getVecImm(N)) {
1452 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1458 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1459 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1460 if (ConstantSDNode *CN = getVecImm(N)) {
1461 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1467 // If this is a vector of constants or undefs, get the bits. A bit in
1468 // UndefBits is set if the corresponding element of the vector is an
1469 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1470 // zero. Return true if this is not an array of constants, false if it is.
1472 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1473 uint64_t UndefBits[2]) {
1474 // Start with zero'd results.
1475 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1477 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1478 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1479 SDValue OpVal = BV->getOperand(i);
1481 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1482 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1484 uint64_t EltBits = 0;
1485 if (OpVal.getOpcode() == ISD::UNDEF) {
1486 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1487 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1489 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1490 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1491 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1492 const APFloat &apf = CN->getValueAPF();
1493 EltBits = (CN->getValueType(0) == MVT::f32
1494 ? FloatToBits(apf.convertToFloat())
1495 : DoubleToBits(apf.convertToDouble()));
1497 // Nonconstant element.
1501 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1504 //printf("%llx %llx %llx %llx\n",
1505 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1509 /// If this is a splat (repetition) of a value across the whole vector, return
1510 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1511 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1512 /// SplatSize = 1 byte.
1513 static bool isConstantSplat(const uint64_t Bits128[2],
1514 const uint64_t Undef128[2],
1516 uint64_t &SplatBits, uint64_t &SplatUndef,
1518 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1519 // the same as the lower 64-bits, ignoring undefs.
1520 uint64_t Bits64 = Bits128[0] | Bits128[1];
1521 uint64_t Undef64 = Undef128[0] & Undef128[1];
1522 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1523 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1524 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1525 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1527 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1528 if (MinSplatBits < 64) {
1530 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1532 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1533 if (MinSplatBits < 32) {
1535 // If the top 16-bits are different than the lower 16-bits, ignoring
1536 // undefs, we have an i32 splat.
1537 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1538 if (MinSplatBits < 16) {
1539 // If the top 8-bits are different than the lower 8-bits, ignoring
1540 // undefs, we have an i16 splat.
1541 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1542 == ((Bits16 >> 8) & ~Undef16)) {
1543 // Otherwise, we have an 8-bit splat.
1544 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1545 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1551 SplatUndef = Undef16;
1558 SplatUndef = Undef32;
1564 SplatBits = Bits128[0];
1565 SplatUndef = Undef128[0];
1571 return false; // Can't be a splat if two pieces don't match.
1574 // If this is a case we can't handle, return null and let the default
1575 // expansion code take care of it. If we CAN select this case, and if it
1576 // selects to a single instruction, return Op. Otherwise, if we can codegen
1577 // this case more efficiently than a constant pool load, lower it to the
1578 // sequence of ops that should be used.
1579 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1580 MVT VT = Op.getValueType();
1581 // If this is a vector of constants or undefs, get the bits. A bit in
1582 // UndefBits is set if the corresponding element of the vector is an
1583 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1585 uint64_t VectorBits[2];
1586 uint64_t UndefBits[2];
1587 uint64_t SplatBits, SplatUndef;
1589 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1590 || !isConstantSplat(VectorBits, UndefBits,
1591 VT.getVectorElementType().getSizeInBits(),
1592 SplatBits, SplatUndef, SplatSize))
1593 return SDValue(); // Not a constant vector, not a splat.
1595 switch (VT.getSimpleVT()) {
1598 uint32_t Value32 = SplatBits;
1599 assert(SplatSize == 4
1600 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1601 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602 SDValue T = DAG.getConstant(Value32, MVT::i32);
1603 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1604 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1608 uint64_t f64val = SplatBits;
1609 assert(SplatSize == 8
1610 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1611 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1612 SDValue T = DAG.getConstant(f64val, MVT::i64);
1613 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1614 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1618 // 8-bit constants have to be expanded to 16-bits
1619 unsigned short Value16 = SplatBits | (SplatBits << 8);
1621 for (int i = 0; i < 8; ++i)
1622 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1623 return DAG.getNode(ISD::BIT_CONVERT, VT,
1624 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1627 unsigned short Value16;
1629 Value16 = (unsigned short) (SplatBits & 0xffff);
1631 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1632 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1634 for (int i = 0; i < 8; ++i) Ops[i] = T;
1635 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1638 unsigned int Value = SplatBits;
1639 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1640 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1643 uint64_t val = SplatBits;
1644 uint32_t upper = uint32_t(val >> 32);
1645 uint32_t lower = uint32_t(val);
1647 if (upper == lower) {
1648 // Magic constant that can be matched by IL, ILA, et. al.
1649 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1650 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1654 SmallVector<SDValue, 16> ShufBytes;
1656 bool upper_special, lower_special;
1658 // NOTE: This code creates common-case shuffle masks that can be easily
1659 // detected as common expressions. It is not attempting to create highly
1660 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1662 // Detect if the upper or lower half is a special shuffle mask pattern:
1663 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1664 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1666 // Create lower vector if not a special pattern
1667 if (!lower_special) {
1668 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1669 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1670 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1671 LO32C, LO32C, LO32C, LO32C));
1674 // Create upper vector if not a special pattern
1675 if (!upper_special) {
1676 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1677 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1678 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1679 HI32C, HI32C, HI32C, HI32C));
1682 // If either upper or lower are special, then the two input operands are
1683 // the same (basically, one of them is a "don't care")
1688 if (lower_special && upper_special) {
1689 // Unhappy situation... both upper and lower are special, so punt with
1690 // a target constant:
1691 SDValue Zero = DAG.getConstant(0, MVT::i32);
1692 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1696 for (int i = 0; i < 4; ++i) {
1698 for (int j = 0; j < 4; ++j) {
1700 bool process_upper, process_lower;
1702 process_upper = (upper_special && (i & 1) == 0);
1703 process_lower = (lower_special && (i & 1) == 1);
1705 if (process_upper || process_lower) {
1706 if ((process_upper && upper == 0)
1707 || (process_lower && lower == 0))
1709 else if ((process_upper && upper == 0xffffffff)
1710 || (process_lower && lower == 0xffffffff))
1712 else if ((process_upper && upper == 0x80000000)
1713 || (process_lower && lower == 0x80000000))
1714 val |= (j == 0 ? 0xe0 : 0x80);
1716 val |= i * 4 + j + ((i & 1) * 16);
1719 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1722 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1723 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1724 &ShufBytes[0], ShufBytes.size()));
1732 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1733 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1734 /// permutation vector, V3, is monotonically increasing with one "exception"
1735 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1736 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1737 /// In either case, the net result is going to eventually invoke SHUFB to
1738 /// permute/shuffle the bytes from V1 and V2.
1740 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1741 /// control word for byte/halfword/word insertion. This takes care of a single
1742 /// element move from V2 into V1.
1744 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1745 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1746 SDValue V1 = Op.getOperand(0);
1747 SDValue V2 = Op.getOperand(1);
1748 SDValue PermMask = Op.getOperand(2);
1750 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1752 // If we have a single element being moved from V1 to V2, this can be handled
1753 // using the C*[DX] compute mask instructions, but the vector elements have
1754 // to be monotonically increasing with one exception element.
1755 MVT VecVT = V1.getValueType();
1756 MVT EltVT = VecVT.getVectorElementType();
1757 unsigned EltsFromV2 = 0;
1759 unsigned V2EltIdx0 = 0;
1760 unsigned CurrElt = 0;
1761 unsigned MaxElts = VecVT.getVectorNumElements();
1762 unsigned PrevElt = 0;
1764 bool monotonic = true;
1767 if (EltVT == MVT::i8) {
1769 } else if (EltVT == MVT::i16) {
1771 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1773 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1776 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1778 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1779 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1780 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1783 if (SrcElt >= V2EltIdx0) {
1784 if (1 >= (++EltsFromV2)) {
1785 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1787 } else if (CurrElt != SrcElt) {
1795 if (PrevElt > 0 && SrcElt < MaxElts) {
1796 if ((PrevElt == SrcElt - 1)
1797 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1804 } else if (PrevElt == 0) {
1805 // First time through, need to keep track of previous element
1808 // This isn't a rotation, takes elements from vector 2
1815 if (EltsFromV2 == 1 && monotonic) {
1816 // Compute mask and shuffle
1817 MachineFunction &MF = DAG.getMachineFunction();
1818 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1819 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1820 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1821 // Initialize temporary register to 0
1822 SDValue InitTempReg =
1823 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1824 // Copy register's contents as index in SHUFFLE_MASK:
1825 SDValue ShufMaskOp =
1826 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1827 DAG.getTargetConstant(V2Elt, MVT::i32),
1828 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1829 // Use shuffle mask in SHUFB synthetic instruction:
1830 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1831 } else if (rotate) {
1832 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1834 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1835 V1, DAG.getConstant(rotamt, MVT::i16));
1837 // Convert the SHUFFLE_VECTOR mask's input element units to the
1839 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1841 SmallVector<SDValue, 16> ResultMask;
1842 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1844 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1847 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1849 for (unsigned j = 0; j < BytesPerElement; ++j) {
1850 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1855 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1856 &ResultMask[0], ResultMask.size());
1857 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1861 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1862 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1864 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1865 // For a constant, build the appropriate constant vector, which will
1866 // eventually simplify to a vector register load.
1868 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1869 SmallVector<SDValue, 16> ConstVecValues;
1873 // Create a constant vector:
1874 switch (Op.getValueType().getSimpleVT()) {
1875 default: assert(0 && "Unexpected constant value type in "
1876 "LowerSCALAR_TO_VECTOR");
1877 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1878 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1879 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1880 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1881 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1882 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1885 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1886 for (size_t j = 0; j < n_copies; ++j)
1887 ConstVecValues.push_back(CValue);
1889 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1890 &ConstVecValues[0], ConstVecValues.size());
1892 // Otherwise, copy the value from one register to another:
1893 switch (Op0.getValueType().getSimpleVT()) {
1894 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1901 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1908 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1909 MVT VT = Op.getValueType();
1910 SDValue N = Op.getOperand(0);
1911 SDValue Elt = Op.getOperand(1);
1914 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1915 // Constant argument:
1916 int EltNo = (int) C->getZExtValue();
1919 if (VT == MVT::i8 && EltNo >= 16)
1920 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1921 else if (VT == MVT::i16 && EltNo >= 8)
1922 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1923 else if (VT == MVT::i32 && EltNo >= 4)
1924 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1925 else if (VT == MVT::i64 && EltNo >= 2)
1926 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1928 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1929 // i32 and i64: Element 0 is the preferred slot
1930 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
1933 // Need to generate shuffle mask and extract:
1934 int prefslot_begin = -1, prefslot_end = -1;
1935 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1937 switch (VT.getSimpleVT()) {
1939 assert(false && "Invalid value type!");
1941 prefslot_begin = prefslot_end = 3;
1945 prefslot_begin = 2; prefslot_end = 3;
1950 prefslot_begin = 0; prefslot_end = 3;
1955 prefslot_begin = 0; prefslot_end = 7;
1960 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1961 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1963 unsigned int ShufBytes[16];
1964 for (int i = 0; i < 16; ++i) {
1965 // zero fill uppper part of preferred slot, don't care about the
1967 unsigned int mask_val;
1968 if (i <= prefslot_end) {
1970 ((i < prefslot_begin)
1972 : elt_byte + (i - prefslot_begin));
1974 ShufBytes[i] = mask_val;
1976 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1979 SDValue ShufMask[4];
1980 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1981 unsigned bidx = i * 4;
1982 unsigned int bits = ((ShufBytes[bidx] << 24) |
1983 (ShufBytes[bidx+1] << 16) |
1984 (ShufBytes[bidx+2] << 8) |
1986 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1989 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1991 sizeof(ShufMask) / sizeof(ShufMask[0]));
1993 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
1994 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
1995 N, N, ShufMaskVec));
1997 // Variable index: Rotate the requested element into slot 0, then replicate
1998 // slot 0 across the vector
1999 MVT VecVT = N.getValueType();
2000 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2001 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2005 // Make life easier by making sure the index is zero-extended to i32
2006 if (Elt.getValueType() != MVT::i32)
2007 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2009 // Scale the index to a bit/byte shift quantity
2011 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2012 unsigned scaleShift = scaleFactor.logBase2();
2015 if (scaleShift > 0) {
2016 // Scale the shift factor:
2017 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2018 DAG.getConstant(scaleShift, MVT::i32));
2021 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2023 // Replicate the bytes starting at byte 0 across the entire vector (for
2024 // consistency with the notion of a unified register set)
2027 switch (VT.getSimpleVT()) {
2029 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2033 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2034 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2039 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2040 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2046 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2047 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2053 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2054 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2055 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2056 loFactor, hiFactor);
2061 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2062 DAG.getNode(SPUISD::SHUFB, VecVT,
2063 vecShift, vecShift, replicate));
2069 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2070 SDValue VecOp = Op.getOperand(0);
2071 SDValue ValOp = Op.getOperand(1);
2072 SDValue IdxOp = Op.getOperand(2);
2073 MVT VT = Op.getValueType();
2075 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2076 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2078 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2079 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2080 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2081 DAG.getRegister(SPU::R1, PtrVT),
2082 DAG.getConstant(CN->getSExtValue(), PtrVT));
2083 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2086 DAG.getNode(SPUISD::SHUFB, VT,
2087 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2089 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2094 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2095 const TargetLowering &TLI)
2097 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2098 MVT ShiftVT = TLI.getShiftAmountTy();
2100 assert(Op.getValueType() == MVT::i8);
2103 assert(0 && "Unhandled i8 math operator");
2107 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2109 SDValue N1 = Op.getOperand(1);
2110 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2111 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2112 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2113 DAG.getNode(Opc, MVT::i16, N0, N1));
2118 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2120 SDValue N1 = Op.getOperand(1);
2121 N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
2122 N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
2123 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2124 DAG.getNode(Opc, MVT::i16, N0, N1));
2128 SDValue N1 = Op.getOperand(1);
2130 N0 = (N0.getOpcode() != ISD::Constant
2131 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2132 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2134 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2137 N1 = (N1.getOpcode() != ISD::Constant
2138 ? DAG.getNode(N1Opc, ShiftVT, N1)
2139 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2140 TLI.getShiftAmountTy()));
2142 DAG.getNode(ISD::OR, MVT::i16, N0,
2143 DAG.getNode(ISD::SHL, MVT::i16,
2144 N0, DAG.getConstant(8, MVT::i32)));
2145 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2146 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2150 SDValue N1 = Op.getOperand(1);
2152 N0 = (N0.getOpcode() != ISD::Constant
2153 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2154 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2156 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2159 N1 = (N1.getOpcode() != ISD::Constant
2160 ? DAG.getNode(N1Opc, ShiftVT, N1)
2161 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2162 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2163 DAG.getNode(Opc, MVT::i16, N0, N1));
2166 SDValue N1 = Op.getOperand(1);
2168 N0 = (N0.getOpcode() != ISD::Constant
2169 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2170 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2172 N1Opc = N1.getValueType().bitsLT(ShiftVT)
2175 N1 = (N1.getOpcode() != ISD::Constant
2176 ? DAG.getNode(N1Opc, ShiftVT, N1)
2177 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2179 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2180 DAG.getNode(Opc, MVT::i16, N0, N1));
2183 SDValue N1 = Op.getOperand(1);
2185 N0 = (N0.getOpcode() != ISD::Constant
2186 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2187 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2189 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2190 N1 = (N1.getOpcode() != ISD::Constant
2191 ? DAG.getNode(N1Opc, MVT::i16, N1)
2192 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2194 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2195 DAG.getNode(Opc, MVT::i16, N0, N1));
2203 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2205 MVT VT = Op.getValueType();
2206 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2208 SDValue Op0 = Op.getOperand(0);
2211 case ISD::ZERO_EXTEND:
2212 case ISD::ANY_EXTEND: {
2213 MVT Op0VT = Op0.getValueType();
2214 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2216 assert(Op0VT == MVT::i32
2217 && "CellSPU: Zero/sign extending something other than i32");
2219 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2221 SDValue PromoteScalar =
2222 DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2224 // Use a shuffle to zero extend the i32 to i64 directly:
2225 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2226 DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203,
2227 MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(
2228 0x08090a0b, MVT::i32));
2229 SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar,
2230 PromoteScalar, shufMask);
2232 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT,
2233 VecVT, zextShuffle));
2237 // Turn operands into vectors to satisfy type checking (shufb works on
2240 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2242 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2243 SmallVector<SDValue, 16> ShufBytes;
2245 // Create the shuffle mask for "rotating" the borrow up one register slot
2246 // once the borrow is generated.
2247 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2248 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2249 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2250 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2253 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2254 SDValue ShiftedCarry =
2255 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2257 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2258 &ShufBytes[0], ShufBytes.size()));
2260 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2261 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2262 Op0, Op1, ShiftedCarry));
2266 // Turn operands into vectors to satisfy type checking (shufb works on
2269 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2271 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2272 SmallVector<SDValue, 16> ShufBytes;
2274 // Create the shuffle mask for "rotating" the borrow up one register slot
2275 // once the borrow is generated.
2276 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2277 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2278 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2279 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2282 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2283 SDValue ShiftedBorrow =
2284 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2285 BorrowGen, BorrowGen,
2286 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2287 &ShufBytes[0], ShufBytes.size()));
2289 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2290 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2291 Op0, Op1, ShiftedBorrow));
2298 //! Lower byte immediate operations for v16i8 vectors:
2300 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2303 MVT VT = Op.getValueType();
2305 ConstVec = Op.getOperand(0);
2306 Arg = Op.getOperand(1);
2307 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2308 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2309 ConstVec = ConstVec.getOperand(0);
2311 ConstVec = Op.getOperand(1);
2312 Arg = Op.getOperand(0);
2313 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2314 ConstVec = ConstVec.getOperand(0);
2319 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2320 uint64_t VectorBits[2];
2321 uint64_t UndefBits[2];
2322 uint64_t SplatBits, SplatUndef;
2325 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2326 && isConstantSplat(VectorBits, UndefBits,
2327 VT.getVectorElementType().getSizeInBits(),
2328 SplatBits, SplatUndef, SplatSize)) {
2330 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2331 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2333 // Turn the BUILD_VECTOR into a set of target constants:
2334 for (size_t i = 0; i < tcVecSize; ++i)
2337 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2338 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2341 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2342 // lowered. Return the operation, rather than a null SDValue.
2346 //! Custom lowering for CTPOP (count population)
2348 Custom lowering code that counts the number ones in the input
2349 operand. SPU has such an instruction, but it counts the number of
2350 ones per byte, which then have to be accumulated.
2352 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2353 MVT VT = Op.getValueType();
2354 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2356 switch (VT.getSimpleVT()) {
2358 assert(false && "Invalid value type!");
2360 SDValue N = Op.getOperand(0);
2361 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2363 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2364 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2366 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2370 MachineFunction &MF = DAG.getMachineFunction();
2371 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2373 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2375 SDValue N = Op.getOperand(0);
2376 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2377 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2378 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2380 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2381 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2383 // CNTB_result becomes the chain to which all of the virtual registers
2384 // CNTB_reg, SUM1_reg become associated:
2385 SDValue CNTB_result =
2386 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2388 SDValue CNTB_rescopy =
2389 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2391 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2393 return DAG.getNode(ISD::AND, MVT::i16,
2394 DAG.getNode(ISD::ADD, MVT::i16,
2395 DAG.getNode(ISD::SRL, MVT::i16,
2402 MachineFunction &MF = DAG.getMachineFunction();
2403 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2405 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2406 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2408 SDValue N = Op.getOperand(0);
2409 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2410 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2411 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2412 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2414 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2415 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2417 // CNTB_result becomes the chain to which all of the virtual registers
2418 // CNTB_reg, SUM1_reg become associated:
2419 SDValue CNTB_result =
2420 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2422 SDValue CNTB_rescopy =
2423 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2426 DAG.getNode(ISD::SRL, MVT::i32,
2427 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2430 DAG.getNode(ISD::ADD, MVT::i32,
2431 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2433 SDValue Sum1_rescopy =
2434 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2437 DAG.getNode(ISD::SRL, MVT::i32,
2438 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2441 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2442 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2444 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2454 //! Lower ISD::SETCC
2456 Lower i64 condition code handling.
2459 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) {
2460 MVT VT = Op.getValueType();
2461 SDValue lhs = Op.getOperand(0);
2462 SDValue rhs = Op.getOperand(1);
2463 SDValue condition = Op.getOperand(2);
2465 if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) {
2466 // Expand the i64 comparisons to what Cell can actually support,
2467 // which is eq, ugt and sgt:
2469 CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition);
2471 switch (ccvalue->get()) {
2480 //! Lower ISD::SELECT_CC
2482 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2485 \note Need to revisit this in the future: if the code path through the true
2486 and false value computations is longer than the latency of a branch (6
2487 cycles), then it would be more advantageous to branch and insert a new basic
2488 block and branch on the condition. However, this code does not make that
2489 assumption, given the simplisitc uses so far.
2492 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2493 const TargetLowering &TLI) {
2494 MVT VT = Op.getValueType();
2495 SDValue lhs = Op.getOperand(0);
2496 SDValue rhs = Op.getOperand(1);
2497 SDValue trueval = Op.getOperand(2);
2498 SDValue falseval = Op.getOperand(3);
2499 SDValue condition = Op.getOperand(4);
2501 // NOTE: SELB's arguments: $rA, $rB, $mask
2503 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2504 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2505 // condition was true and 0s where the condition was false. Hence, the
2506 // arguments to SELB get reversed.
2508 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2509 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2510 // with another "cannot select select_cc" assert:
2512 SDValue compare = DAG.getNode(ISD::SETCC,
2513 TLI.getSetCCResultType(Op.getValueType()),
2514 lhs, rhs, condition);
2515 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2518 //! Custom lower ISD::TRUNCATE
2519 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2521 MVT VT = Op.getValueType();
2522 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2523 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2525 SDValue Op0 = Op.getOperand(0);
2526 MVT Op0VT = Op0.getValueType();
2527 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2529 // Create shuffle mask
2530 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2531 // least significant doubleword of quadword
2532 unsigned maskHigh = 0x08090a0b;
2533 unsigned maskLow = 0x0c0d0e0f;
2534 // Use a shuffle to perform the truncation
2535 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2536 DAG.getConstant(maskHigh, MVT::i32),
2537 DAG.getConstant(maskLow, MVT::i32),
2538 DAG.getConstant(maskHigh, MVT::i32),
2539 DAG.getConstant(maskLow, MVT::i32));
2542 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2544 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2545 PromoteScalar, PromoteScalar, shufMask);
2547 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2548 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2551 return SDValue(); // Leave the truncate unmolested
2554 //! Custom (target-specific) lowering entry point
2556 This is where LLVM's DAG selection process calls to do target-specific
2560 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2562 unsigned Opc = (unsigned) Op.getOpcode();
2563 MVT VT = Op.getValueType();
2567 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2568 cerr << "Op.getOpcode() = " << Opc << "\n";
2569 cerr << "*Op.getNode():\n";
2570 Op.getNode()->dump();
2577 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2579 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2580 case ISD::ConstantPool:
2581 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2582 case ISD::GlobalAddress:
2583 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2584 case ISD::JumpTable:
2585 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2587 return LowerConstant(Op, DAG);
2588 case ISD::ConstantFP:
2589 return LowerConstantFP(Op, DAG);
2591 return LowerBRCOND(Op, DAG, *this);
2592 case ISD::FORMAL_ARGUMENTS:
2593 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2595 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2597 return LowerRET(Op, DAG, getTargetMachine());
2600 case ISD::ZERO_EXTEND:
2601 case ISD::ANY_EXTEND:
2602 return LowerI64Math(Op, DAG, Opc);
2604 // i8, i64 math ops:
2613 return LowerI8Math(Op, DAG, Opc, *this);
2614 else if (VT == MVT::i64)
2615 return LowerI64Math(Op, DAG, Opc);
2619 // Vector-related lowering.
2620 case ISD::BUILD_VECTOR:
2621 return LowerBUILD_VECTOR(Op, DAG);
2622 case ISD::SCALAR_TO_VECTOR:
2623 return LowerSCALAR_TO_VECTOR(Op, DAG);
2624 case ISD::VECTOR_SHUFFLE:
2625 return LowerVECTOR_SHUFFLE(Op, DAG);
2626 case ISD::EXTRACT_VECTOR_ELT:
2627 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2628 case ISD::INSERT_VECTOR_ELT:
2629 return LowerINSERT_VECTOR_ELT(Op, DAG);
2631 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2635 return LowerByteImmed(Op, DAG);
2637 // Vector and i8 multiply:
2640 return LowerI8Math(Op, DAG, Opc, *this);
2643 return LowerCTPOP(Op, DAG);
2645 case ISD::SELECT_CC:
2646 return LowerSELECT_CC(Op, DAG, *this);
2649 return LowerTRUNCATE(Op, DAG);
2652 return LowerSETCC(Op, DAG);
2658 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2659 SmallVectorImpl<SDValue>&Results,
2663 unsigned Opc = (unsigned) N->getOpcode();
2664 MVT OpVT = N->getValueType(0);
2668 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2669 cerr << "Op.getOpcode() = " << Opc << "\n";
2670 cerr << "*Op.getNode():\n";
2678 /* Otherwise, return unchanged */
2681 //===----------------------------------------------------------------------===//
2682 // Target Optimization Hooks
2683 //===----------------------------------------------------------------------===//
2686 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2689 TargetMachine &TM = getTargetMachine();
2691 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2692 SelectionDAG &DAG = DCI.DAG;
2693 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2694 MVT NodeVT = N->getValueType(0); // The node's value type
2695 MVT Op0VT = Op0.getValueType(); // The first operand's result
2696 SDValue Result; // Initially, empty result
2698 switch (N->getOpcode()) {
2701 SDValue Op1 = N->getOperand(1);
2703 if (Op0.getOpcode() == SPUISD::IndirectAddr
2704 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2705 // Normalize the operands to reduce repeated code
2706 SDValue IndirectArg = Op0, AddArg = Op1;
2708 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2713 if (isa<ConstantSDNode>(AddArg)) {
2714 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2715 SDValue IndOp1 = IndirectArg.getOperand(1);
2717 if (CN0->isNullValue()) {
2718 // (add (SPUindirect <arg>, <arg>), 0) ->
2719 // (SPUindirect <arg>, <arg>)
2721 #if !defined(NDEBUG)
2722 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2724 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2725 << "With: (SPUindirect <arg>, <arg>)\n";
2730 } else if (isa<ConstantSDNode>(IndOp1)) {
2731 // (add (SPUindirect <arg>, <const>), <const>) ->
2732 // (SPUindirect <arg>, <const + const>)
2733 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2734 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2735 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2737 #if !defined(NDEBUG)
2738 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2740 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2741 << "), " << CN0->getSExtValue() << ")\n"
2742 << "With: (SPUindirect <arg>, "
2743 << combinedConst << ")\n";
2747 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2748 IndirectArg, combinedValue);
2754 case ISD::SIGN_EXTEND:
2755 case ISD::ZERO_EXTEND:
2756 case ISD::ANY_EXTEND: {
2757 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2758 // (any_extend (SPUextract_elt0 <arg>)) ->
2759 // (SPUextract_elt0 <arg>)
2760 // Types must match, however...
2761 #if !defined(NDEBUG)
2762 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2763 cerr << "\nReplace: ";
2766 Op0.getNode()->dump(&DAG);
2775 case SPUISD::IndirectAddr: {
2776 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2777 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2778 if (CN->getZExtValue() == 0) {
2779 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2780 // (SPUaform <addr>, 0)
2782 DEBUG(cerr << "Replace: ");
2783 DEBUG(N->dump(&DAG));
2784 DEBUG(cerr << "\nWith: ");
2785 DEBUG(Op0.getNode()->dump(&DAG));
2786 DEBUG(cerr << "\n");
2790 } else if (Op0.getOpcode() == ISD::ADD) {
2791 SDValue Op1 = N->getOperand(1);
2792 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2793 // (SPUindirect (add <arg>, <arg>), 0) ->
2794 // (SPUindirect <arg>, <arg>)
2795 if (CN1->isNullValue()) {
2797 #if !defined(NDEBUG)
2798 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2800 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2801 << "With: (SPUindirect <arg>, <arg>)\n";
2805 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
2806 Op0.getOperand(0), Op0.getOperand(1));
2812 case SPUISD::SHLQUAD_L_BITS:
2813 case SPUISD::SHLQUAD_L_BYTES:
2814 case SPUISD::VEC_SHL:
2815 case SPUISD::VEC_SRL:
2816 case SPUISD::VEC_SRA:
2817 case SPUISD::ROTBYTES_LEFT: {
2818 SDValue Op1 = N->getOperand(1);
2820 // Kill degenerate vector shifts:
2821 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2822 if (CN->isNullValue()) {
2828 case SPUISD::PREFSLOT2VEC: {
2829 switch (Op0.getOpcode()) {
2832 case ISD::ANY_EXTEND:
2833 case ISD::ZERO_EXTEND:
2834 case ISD::SIGN_EXTEND: {
2835 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2837 // but only if the SPUprefslot2vec and <arg> types match.
2838 SDValue Op00 = Op0.getOperand(0);
2839 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2840 SDValue Op000 = Op00.getOperand(0);
2841 if (Op000.getValueType() == NodeVT) {
2847 case SPUISD::VEC2PREFSLOT: {
2848 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2850 Result = Op0.getOperand(0);
2857 // Otherwise, return unchanged.
2859 if (Result.getNode()) {
2860 DEBUG(cerr << "\nReplace.SPU: ");
2861 DEBUG(N->dump(&DAG));
2862 DEBUG(cerr << "\nWith: ");
2863 DEBUG(Result.getNode()->dump(&DAG));
2864 DEBUG(cerr << "\n");
2871 //===----------------------------------------------------------------------===//
2872 // Inline Assembly Support
2873 //===----------------------------------------------------------------------===//
2875 /// getConstraintType - Given a constraint letter, return the type of
2876 /// constraint it is for this target.
2877 SPUTargetLowering::ConstraintType
2878 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2879 if (ConstraintLetter.size() == 1) {
2880 switch (ConstraintLetter[0]) {
2887 return C_RegisterClass;
2890 return TargetLowering::getConstraintType(ConstraintLetter);
2893 std::pair<unsigned, const TargetRegisterClass*>
2894 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2897 if (Constraint.size() == 1) {
2898 // GCC RS6000 Constraint Letters
2899 switch (Constraint[0]) {
2903 return std::make_pair(0U, SPU::R64CRegisterClass);
2904 return std::make_pair(0U, SPU::R32CRegisterClass);
2907 return std::make_pair(0U, SPU::R32FPRegisterClass);
2908 else if (VT == MVT::f64)
2909 return std::make_pair(0U, SPU::R64FPRegisterClass);
2912 return std::make_pair(0U, SPU::GPRCRegisterClass);
2916 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2919 //! Compute used/known bits for a SPU operand
2921 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2925 const SelectionDAG &DAG,
2926 unsigned Depth ) const {
2928 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2931 switch (Op.getOpcode()) {
2933 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2943 case SPUISD::PREFSLOT2VEC: {
2944 SDValue Op0 = Op.getOperand(0);
2945 MVT Op0VT = Op0.getValueType();
2946 unsigned Op0VTBits = Op0VT.getSizeInBits();
2947 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2948 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2949 KnownOne |= APInt(Op0VTBits, InMask, false);
2953 case SPUISD::LDRESULT:
2954 case SPUISD::VEC2PREFSLOT: {
2955 MVT OpVT = Op.getValueType();
2956 unsigned OpVTBits = OpVT.getSizeInBits();
2957 uint64_t InMask = OpVT.getIntegerVTBitMask();
2958 KnownZero |= APInt(OpVTBits, ~InMask, false);
2959 KnownOne |= APInt(OpVTBits, InMask, false);
2964 case SPUISD::SHLQUAD_L_BITS:
2965 case SPUISD::SHLQUAD_L_BYTES:
2966 case SPUISD::VEC_SHL:
2967 case SPUISD::VEC_SRL:
2968 case SPUISD::VEC_SRA:
2969 case SPUISD::VEC_ROTL:
2970 case SPUISD::VEC_ROTR:
2971 case SPUISD::ROTBYTES_LEFT:
2972 case SPUISD::SELECT_MASK:
2974 case SPUISD::SEXT32TO64:
2980 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2981 unsigned Depth) const {
2982 switch (Op.getOpcode()) {
2987 MVT VT = Op.getValueType();
2989 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2992 return VT.getSizeInBits();
2997 // LowerAsmOperandForConstraint
2999 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3000 char ConstraintLetter,
3002 std::vector<SDValue> &Ops,
3003 SelectionDAG &DAG) const {
3004 // Default, for the time being, to the base class handler
3005 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3009 /// isLegalAddressImmediate - Return true if the integer value can be used
3010 /// as the offset of the target addressing mode.
3011 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3012 const Type *Ty) const {
3013 // SPU's addresses are 256K:
3014 return (V > -(1 << 18) && V < (1 << 18) - 1);
3017 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3022 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3023 // The SPU target isn't yet aware of offsets.