1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
31 #include "llvm/CodeGen/SchedulerRegistry.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT mapping to useful data for Cell SPU
42 struct valtype_map_s {
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
82 //! Predicate that returns true if operand is a memory target
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an A-form
89 bool isMemoryOperand(const SDValue &Op)
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::JumpTable
95 || Opc == ISD::ConstantPool
96 || Opc == ISD::ExternalSymbol
97 || Opc == ISD::TargetGlobalAddress
98 || Opc == ISD::TargetGlobalTLSAddress
99 || Opc == ISD::TargetJumpTable
100 || Opc == ISD::TargetConstantPool
101 || Opc == ISD::TargetExternalSymbol
102 || Opc == SPUISD::AFormAddr);
105 //! Predicate that returns true if the operand is an indirect target
106 bool isIndirectOperand(const SDValue &Op)
108 const unsigned Opc = Op.getOpcode();
109 return (Opc == ISD::Register
110 || Opc == SPUISD::LDRESULT);
114 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
115 : TargetLowering(TM),
118 // Fold away setcc operations if possible.
121 // Use _setjmp/_longjmp instead of setjmp/longjmp.
122 setUseUnderscoreSetJmp(true);
123 setUseUnderscoreLongJmp(true);
125 // Set up the SPU's register classes:
126 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
127 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
128 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
129 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
130 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
131 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
132 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
134 // Initialize libcalls:
135 setLibcallName(RTLIB::MUL_I64, "__muldi3");
137 // SPU has no sign or zero extended loads for i1, i8, i16:
138 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
140 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
142 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
144 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
149 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
151 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
153 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
155 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
157 // SPU constant load actions are custom lowered:
158 setOperationAction(ISD::Constant, MVT::i64, Custom);
159 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
160 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
162 // SPU's loads and stores have to be custom lowered:
163 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
165 MVT VT = (MVT::SimpleValueType)sctype;
167 setOperationAction(ISD::LOAD, VT, Custom);
168 setOperationAction(ISD::STORE, VT, Custom);
171 // Custom lower BRCOND for i8 to "promote" the result to i16
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 // Custom lower SELECT_CC for most cases, but expand by default
179 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
180 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
181 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
182 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
184 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
187 // SPU has no intrinsics for these particular operations:
188 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
190 // PowerPC has no SREM/UREM instructions
191 setOperationAction(ISD::SREM, MVT::i32, Expand);
192 setOperationAction(ISD::UREM, MVT::i32, Expand);
193 setOperationAction(ISD::SREM, MVT::i64, Expand);
194 setOperationAction(ISD::UREM, MVT::i64, Expand);
196 // We don't support sin/cos/sqrt/fmod
197 setOperationAction(ISD::FSIN , MVT::f64, Expand);
198 setOperationAction(ISD::FCOS , MVT::f64, Expand);
199 setOperationAction(ISD::FREM , MVT::f64, Expand);
200 setOperationAction(ISD::FSIN , MVT::f32, Expand);
201 setOperationAction(ISD::FCOS , MVT::f32, Expand);
202 setOperationAction(ISD::FREM , MVT::f32, Expand);
204 // If we're enabling GP optimizations, use hardware square root
205 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
206 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
208 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
209 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
211 // SPU can do rotate right and left, so legalize it... but customize for i8
212 // because instructions don't exist.
214 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
216 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
217 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
218 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
220 setOperationAction(ISD::ROTL, MVT::i32, Legal);
221 setOperationAction(ISD::ROTL, MVT::i16, Legal);
222 setOperationAction(ISD::ROTL, MVT::i8, Custom);
224 // SPU has no native version of shift left/right for i8
225 setOperationAction(ISD::SHL, MVT::i8, Custom);
226 setOperationAction(ISD::SRL, MVT::i8, Custom);
227 setOperationAction(ISD::SRA, MVT::i8, Custom);
229 // SPU needs custom lowering for shift left/right for i64
230 setOperationAction(ISD::SHL, MVT::i64, Custom);
231 setOperationAction(ISD::SRL, MVT::i64, Custom);
232 setOperationAction(ISD::SRA, MVT::i64, Custom);
234 // Custom lower i8, i32 and i64 multiplications
235 setOperationAction(ISD::MUL, MVT::i8, Custom);
236 setOperationAction(ISD::MUL, MVT::i32, Custom);
237 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
239 // SMUL_LOHI, UMUL_LOHI
240 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
241 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
242 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
243 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
245 // Need to custom handle (some) common i8, i64 math ops
246 setOperationAction(ISD::ADD, MVT::i64, Custom);
247 setOperationAction(ISD::SUB, MVT::i8, Custom);
248 setOperationAction(ISD::SUB, MVT::i64, Custom);
250 // SPU does not have BSWAP. It does have i32 support CTLZ.
251 // CTPOP has to be custom lowered.
252 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
253 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
255 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
256 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
257 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
258 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
260 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
261 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
263 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
265 // SPU has a version of select that implements (a&~c)|(b&c), just like
266 // select ought to work:
267 setOperationAction(ISD::SELECT, MVT::i8, Legal);
268 setOperationAction(ISD::SELECT, MVT::i16, Legal);
269 setOperationAction(ISD::SELECT, MVT::i32, Legal);
270 setOperationAction(ISD::SELECT, MVT::i64, Expand);
272 setOperationAction(ISD::SETCC, MVT::i8, Legal);
273 setOperationAction(ISD::SETCC, MVT::i16, Legal);
274 setOperationAction(ISD::SETCC, MVT::i32, Legal);
275 setOperationAction(ISD::SETCC, MVT::i64, Expand);
277 // Zero extension and sign extension for i64 have to be
279 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
280 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
281 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
283 // Custom lower truncates
284 setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
285 setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
286 setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
287 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
289 // SPU has a legal FP -> signed INT instruction
290 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
291 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
293 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
295 // FDIV on SPU requires custom lowering
296 setOperationAction(ISD::FDIV, MVT::f32, Custom);
297 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
299 // SPU has [U|S]INT_TO_FP
300 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
301 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
302 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
304 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
305 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
309 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
311 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
312 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
314 // We cannot sextinreg(i1). Expand to shifts.
315 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
317 // Support label based line numbers.
318 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
319 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
321 // We want to legalize GlobalAddress and ConstantPool nodes into the
322 // appropriate instructions to materialize the address.
323 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
325 MVT VT = (MVT::SimpleValueType)sctype;
327 setOperationAction(ISD::GlobalAddress, VT, Custom);
328 setOperationAction(ISD::ConstantPool, VT, Custom);
329 setOperationAction(ISD::JumpTable, VT, Custom);
332 // RET must be custom lowered, to meet ABI requirements
333 setOperationAction(ISD::RET, MVT::Other, Custom);
335 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
336 setOperationAction(ISD::VASTART , MVT::Other, Custom);
338 // Use the default implementation.
339 setOperationAction(ISD::VAARG , MVT::Other, Expand);
340 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
341 setOperationAction(ISD::VAEND , MVT::Other, Expand);
342 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
343 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
344 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
345 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
347 // Cell SPU has instructions for converting between i64 and fp.
348 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
349 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
351 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
352 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
354 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
355 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
357 // First set operation action for all vector types to expand. Then we
358 // will selectively turn on ones that can be effectively codegen'd.
359 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
363 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
364 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
366 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
367 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
368 MVT VT = (MVT::SimpleValueType)i;
370 // add/sub are legal for all supported vector VT's.
371 setOperationAction(ISD::ADD , VT, Legal);
372 setOperationAction(ISD::SUB , VT, Legal);
373 // mul has to be custom lowered.
374 setOperationAction(ISD::MUL , VT, Custom);
376 setOperationAction(ISD::AND , VT, Legal);
377 setOperationAction(ISD::OR , VT, Legal);
378 setOperationAction(ISD::XOR , VT, Legal);
379 setOperationAction(ISD::LOAD , VT, Legal);
380 setOperationAction(ISD::SELECT, VT, Legal);
381 setOperationAction(ISD::STORE, VT, Legal);
383 // These operations need to be expanded:
384 setOperationAction(ISD::SDIV, VT, Expand);
385 setOperationAction(ISD::SREM, VT, Expand);
386 setOperationAction(ISD::UDIV, VT, Expand);
387 setOperationAction(ISD::UREM, VT, Expand);
388 setOperationAction(ISD::FDIV, VT, Custom);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
393 setOperationAction(ISD::ConstantPool, VT, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
401 setOperationAction(ISD::AND, MVT::v16i8, Custom);
402 setOperationAction(ISD::OR, MVT::v16i8, Custom);
403 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
404 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
406 setShiftAmountType(MVT::i32);
407 setBooleanContents(ZeroOrOneBooleanContent);
409 setStackPointerRegisterToSaveRestore(SPU::R1);
411 // We have target-specific dag combine patterns for the following nodes:
412 setTargetDAGCombine(ISD::ADD);
413 setTargetDAGCombine(ISD::ZERO_EXTEND);
414 setTargetDAGCombine(ISD::SIGN_EXTEND);
415 setTargetDAGCombine(ISD::ANY_EXTEND);
417 computeRegisterProperties();
419 // Set pre-RA register scheduler default to BURR, which produces slightly
420 // better code than the default (could also be TDRR, but TargetLowering.h
421 // needs a mod to support that model):
422 setSchedulingPreference(SchedulingForRegPressure);
426 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
428 if (node_names.empty()) {
429 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
430 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
431 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
432 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
433 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
434 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
435 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
436 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
437 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
438 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
439 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
440 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
441 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
442 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
443 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
444 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
445 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
446 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
447 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
448 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
449 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
450 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
451 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
452 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
453 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
454 "SPUISD::ROTQUAD_RZ_BYTES";
455 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
456 "SPUISD::ROTQUAD_RZ_BITS";
457 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
458 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
459 "SPUISD::ROTBYTES_LEFT_BITS";
460 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
461 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
462 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
463 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
464 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
465 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
466 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
467 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
468 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
471 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
473 return ((i != node_names.end()) ? i->second : 0);
476 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
477 MVT VT = Op.getValueType();
478 return (VT.isInteger() ? VT : MVT(MVT::i32));
481 //===----------------------------------------------------------------------===//
482 // Calling convention code:
483 //===----------------------------------------------------------------------===//
485 #include "SPUGenCallingConv.inc"
487 //===----------------------------------------------------------------------===//
488 // LowerOperation implementation
489 //===----------------------------------------------------------------------===//
491 /// Aligned load common code for CellSPU
493 \param[in] Op The SelectionDAG load or store operand
494 \param[in] DAG The selection DAG
495 \param[in] ST CellSPU subtarget information structure
496 \param[in,out] alignment Caller initializes this to the load or store node's
497 value from getAlignment(), may be updated while generating the aligned load
498 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
499 offset (divisible by 16, modulo 16 == 0)
500 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
501 offset of the preferred slot (modulo 16 != 0)
502 \param[in,out] VT Caller initializes this value type to the the load or store
503 node's loaded or stored value type; may be updated if an i1-extended load or
505 \param[out] was16aligned true if the base pointer had 16-byte alignment,
506 otherwise false. Can help to determine if the chunk needs to be rotated.
508 Both load and store lowering load a block of data aligned on a 16-byte
509 boundary. This is the common aligned load code shared between both.
512 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
514 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
515 MVT &VT, bool &was16aligned)
517 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
518 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
519 SDValue basePtr = LSN->getBasePtr();
520 SDValue chain = LSN->getChain();
522 if (basePtr.getOpcode() == ISD::ADD) {
523 SDValue Op1 = basePtr.getNode()->getOperand(1);
525 if (Op1.getOpcode() == ISD::Constant
526 || Op1.getOpcode() == ISD::TargetConstant) {
527 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
529 alignOffs = (int) CN->getZExtValue();
530 prefSlotOffs = (int) (alignOffs & 0xf);
532 // Adjust the rotation amount to ensure that the final result ends up in
533 // the preferred slot:
534 prefSlotOffs -= vtm->prefslot_byte;
535 basePtr = basePtr.getOperand(0);
537 // Loading from memory, can we adjust alignment?
538 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
539 SDValue APtr = basePtr.getOperand(0);
540 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
541 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
542 alignment = GSDN->getGlobal()->getAlignment();
547 prefSlotOffs = -vtm->prefslot_byte;
549 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
550 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
551 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
552 prefSlotOffs = (int) (alignOffs & 0xf);
553 prefSlotOffs -= vtm->prefslot_byte;
556 prefSlotOffs = -vtm->prefslot_byte;
559 if (alignment == 16) {
560 // Realign the base pointer as a D-Form address:
561 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
562 basePtr = DAG.getNode(ISD::ADD, PtrVT,
564 DAG.getConstant((alignOffs & ~0xf), PtrVT));
567 // Emit the vector load:
569 return DAG.getLoad(MVT::v16i8, chain, basePtr,
570 LSN->getSrcValue(), LSN->getSrcValueOffset(),
571 LSN->isVolatile(), 16);
574 // Unaligned load or we're using the "large memory" model, which means that
575 // we have to be very pessimistic:
576 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
577 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
578 DAG.getConstant(0, PtrVT));
582 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
583 DAG.getConstant((alignOffs & ~0xf), PtrVT));
584 was16aligned = false;
585 return DAG.getLoad(MVT::v16i8, chain, basePtr,
586 LSN->getSrcValue(), LSN->getSrcValueOffset(),
587 LSN->isVolatile(), 16);
590 /// Custom lower loads for CellSPU
592 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
593 within a 16-byte block, we have to rotate to extract the requested element.
595 For extending loads, we also want to ensure that the following sequence is
596 emitted, e.g. for MVT::f32 extending load to MVT::f64:
600 %2 v16i8,ch = rotate %1
601 %3 v4f8, ch = bitconvert %2
602 %4 f32 = vec2perfslot %3
603 %5 f64 = fp_extend %4
607 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
608 LoadSDNode *LN = cast<LoadSDNode>(Op);
609 SDValue the_chain = LN->getChain();
610 MVT InVT = LN->getMemoryVT();
611 MVT OutVT = Op.getValueType();
612 ISD::LoadExtType ExtType = LN->getExtensionType();
613 unsigned alignment = LN->getAlignment();
616 switch (LN->getAddressingMode()) {
617 case ISD::UNINDEXED: {
621 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
624 if (result.getNode() == 0)
627 the_chain = result.getValue(1);
628 // Rotate the chunk if necessary
631 if (rotamt != 0 || !was16aligned) {
632 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
636 Ops[1] = DAG.getConstant(rotamt, MVT::i16);
638 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
639 LoadSDNode *LN1 = cast<LoadSDNode>(result);
640 Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
641 DAG.getConstant(rotamt, PtrVT));
644 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
647 // Convert the loaded v16i8 vector to the appropriate vector type
648 // specified by the operand:
649 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
650 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
651 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
653 // Handle extending loads by extending the scalar result:
654 if (ExtType == ISD::SEXTLOAD) {
655 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
656 } else if (ExtType == ISD::ZEXTLOAD) {
657 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
658 } else if (ExtType == ISD::EXTLOAD) {
659 unsigned NewOpc = ISD::ANY_EXTEND;
661 if (OutVT.isFloatingPoint())
662 NewOpc = ISD::FP_EXTEND;
664 result = DAG.getNode(NewOpc, OutVT, result);
667 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
668 SDValue retops[2] = {
673 result = DAG.getNode(SPUISD::LDRESULT, retvts,
674 retops, sizeof(retops) / sizeof(retops[0]));
681 case ISD::LAST_INDEXED_MODE:
682 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
684 cerr << (unsigned) LN->getAddressingMode() << "\n";
692 /// Custom lower stores for CellSPU
694 All CellSPU stores are aligned to 16-byte boundaries, so for elements
695 within a 16-byte block, we have to generate a shuffle to insert the
696 requested element into its place, then store the resulting block.
699 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
700 StoreSDNode *SN = cast<StoreSDNode>(Op);
701 SDValue Value = SN->getValue();
702 MVT VT = Value.getValueType();
703 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
704 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
705 unsigned alignment = SN->getAlignment();
707 switch (SN->getAddressingMode()) {
708 case ISD::UNINDEXED: {
709 int chunk_offset, slot_offset;
712 // The vector type we really want to load from the 16-byte chunk.
713 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
714 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
716 SDValue alignLoadVec =
717 AlignedLoad(Op, DAG, ST, SN, alignment,
718 chunk_offset, slot_offset, VT, was16aligned);
720 if (alignLoadVec.getNode() == 0)
723 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
724 SDValue basePtr = LN->getBasePtr();
725 SDValue the_chain = alignLoadVec.getValue(1);
726 SDValue theValue = SN->getValue();
730 && (theValue.getOpcode() == ISD::AssertZext
731 || theValue.getOpcode() == ISD::AssertSext)) {
732 // Drill down and get the value for zero- and sign-extended
734 theValue = theValue.getOperand(0);
739 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
740 SDValue insertEltPtr;
742 // If the base pointer is already a D-form address, then just create
743 // a new D-form address with a slot offset and the orignal base pointer.
744 // Otherwise generate a D-form address with the slot offset relative
745 // to the stack pointer, which is always aligned.
746 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
747 DEBUG(basePtr.getNode()->dump(&DAG));
750 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
751 (basePtr.getOpcode() == ISD::ADD
752 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
753 insertEltPtr = basePtr;
755 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
758 SDValue insertEltOp =
759 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
760 SDValue vectorizeOp =
761 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
763 result = DAG.getNode(SPUISD::SHUFB, vecVT,
764 vectorizeOp, alignLoadVec,
765 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
767 result = DAG.getStore(the_chain, result, basePtr,
768 LN->getSrcValue(), LN->getSrcValueOffset(),
769 LN->isVolatile(), LN->getAlignment());
771 #if 0 && !defined(NDEBUG)
772 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
773 const SDValue ¤tRoot = DAG.getRoot();
776 cerr << "------- CellSPU:LowerStore result:\n";
779 DAG.setRoot(currentRoot);
790 case ISD::LAST_INDEXED_MODE:
791 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
793 cerr << (unsigned) SN->getAddressingMode() << "\n";
801 /// Generate the address of a constant pool entry.
803 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
804 MVT PtrVT = Op.getValueType();
805 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
806 Constant *C = CP->getConstVal();
807 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
808 SDValue Zero = DAG.getConstant(0, PtrVT);
809 const TargetMachine &TM = DAG.getTarget();
811 if (TM.getRelocationModel() == Reloc::Static) {
812 if (!ST->usingLargeMem()) {
813 // Just return the SDValue with the constant pool address in it.
814 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
816 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
817 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
818 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
823 "LowerConstantPool: Relocation model other than static"
829 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
830 MVT PtrVT = Op.getValueType();
831 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
832 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
833 SDValue Zero = DAG.getConstant(0, PtrVT);
834 const TargetMachine &TM = DAG.getTarget();
836 if (TM.getRelocationModel() == Reloc::Static) {
837 if (!ST->usingLargeMem()) {
838 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
840 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
841 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
842 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
847 "LowerJumpTable: Relocation model other than static not supported.");
852 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
853 MVT PtrVT = Op.getValueType();
854 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
855 GlobalValue *GV = GSDN->getGlobal();
856 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
857 const TargetMachine &TM = DAG.getTarget();
858 SDValue Zero = DAG.getConstant(0, PtrVT);
860 if (TM.getRelocationModel() == Reloc::Static) {
861 if (!ST->usingLargeMem()) {
862 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
864 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
865 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
866 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
869 cerr << "LowerGlobalAddress: Relocation model other than static not "
878 //! Custom lower i64 integer constants
880 This code inserts all of the necessary juggling that needs to occur to load
881 a 64-bit constant into a register.
884 LowerConstant(SDValue Op, SelectionDAG &DAG) {
885 MVT VT = Op.getValueType();
887 if (VT == MVT::i64) {
888 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
889 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
890 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
891 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
893 cerr << "LowerConstant: unhandled constant type "
903 //! Custom lower double precision floating point constants
905 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
906 MVT VT = Op.getValueType();
908 if (VT == MVT::f64) {
909 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
912 "LowerConstantFP: Node is not ConstantFPSDNode");
914 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
915 SDValue T = DAG.getConstant(dbits, MVT::i64);
916 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
917 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
918 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
924 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
926 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
928 SDValue Cond = Op.getOperand(1);
929 MVT CondVT = Cond.getValueType();
932 if (CondVT == MVT::i8) {
934 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
936 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
939 return SDValue(); // Unchanged
943 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
945 MachineFunction &MF = DAG.getMachineFunction();
946 MachineFrameInfo *MFI = MF.getFrameInfo();
947 MachineRegisterInfo &RegInfo = MF.getRegInfo();
948 SmallVector<SDValue, 48> ArgValues;
949 SDValue Root = Op.getOperand(0);
950 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
952 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
953 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
955 unsigned ArgOffset = SPUFrameInfo::minStackSize();
956 unsigned ArgRegIdx = 0;
957 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
959 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
961 // Add DAG nodes to load the arguments or copy them out of registers.
962 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
963 ArgNo != e; ++ArgNo) {
964 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
965 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
968 if (ArgRegIdx < NumArgRegs) {
969 const TargetRegisterClass *ArgRegClass;
971 switch (ObjectVT.getSimpleVT()) {
973 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
974 << ObjectVT.getMVTString()
979 ArgRegClass = &SPU::R8CRegClass;
982 ArgRegClass = &SPU::R16CRegClass;
985 ArgRegClass = &SPU::R32CRegClass;
988 ArgRegClass = &SPU::R64CRegClass;
991 ArgRegClass = &SPU::R32FPRegClass;
994 ArgRegClass = &SPU::R64FPRegClass;
1002 ArgRegClass = &SPU::VECREGRegClass;
1006 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1007 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1008 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1011 // We need to load the argument to a virtual register if we determined
1012 // above that we ran out of physical registers of the appropriate type
1013 // or we're forced to do vararg
1014 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1015 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1016 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1017 ArgOffset += StackSlotSize;
1020 ArgValues.push_back(ArgVal);
1022 Root = ArgVal.getOperand(0);
1027 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1028 // We will spill (79-3)+1 registers to the stack
1029 SmallVector<SDValue, 79-3+1> MemOps;
1031 // Create the frame slot
1033 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1034 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1035 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1036 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1037 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1038 Root = Store.getOperand(0);
1039 MemOps.push_back(Store);
1041 // Increment address by stack slot size for the next stored argument
1042 ArgOffset += StackSlotSize;
1044 if (!MemOps.empty())
1045 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1048 ArgValues.push_back(Root);
1050 // Return the new list of results.
1051 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1052 &ArgValues[0], ArgValues.size());
1055 /// isLSAAddress - Return the immediate to use if the specified
1056 /// value is representable as a LSA address.
1057 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1058 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1061 int Addr = C->getZExtValue();
1062 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1063 (Addr << 14 >> 14) != Addr)
1064 return 0; // Top 14 bits have to be sext of immediate.
1066 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1071 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1072 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1073 SDValue Chain = TheCall->getChain();
1074 SDValue Callee = TheCall->getCallee();
1075 unsigned NumOps = TheCall->getNumArgs();
1076 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1077 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1078 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1080 // Handy pointer type
1081 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1083 // Accumulate how many bytes are to be pushed on the stack, including the
1084 // linkage area, and parameter passing area. According to the SPU ABI,
1085 // we minimally need space for [LR] and [SP]
1086 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1088 // Set up a copy of the stack pointer for use loading and storing any
1089 // arguments that may not fit in the registers available for argument
1091 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1093 // Figure out which arguments are going to go in registers, and which in
1095 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1096 unsigned ArgRegIdx = 0;
1098 // Keep track of registers passing arguments
1099 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1100 // And the arguments passed on the stack
1101 SmallVector<SDValue, 8> MemOpChains;
1103 for (unsigned i = 0; i != NumOps; ++i) {
1104 SDValue Arg = TheCall->getArg(i);
1106 // PtrOff will be used to store the current argument to the stack if a
1107 // register cannot be found for it.
1108 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1109 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1111 switch (Arg.getValueType().getSimpleVT()) {
1112 default: assert(0 && "Unexpected ValueType for argument!");
1116 if (ArgRegIdx != NumArgRegs) {
1117 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1119 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1120 ArgOffset += StackSlotSize;
1125 if (ArgRegIdx != NumArgRegs) {
1126 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1128 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1129 ArgOffset += StackSlotSize;
1138 if (ArgRegIdx != NumArgRegs) {
1139 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1141 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1142 ArgOffset += StackSlotSize;
1148 // Update number of stack bytes actually used, insert a call sequence start
1149 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1150 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1153 if (!MemOpChains.empty()) {
1154 // Adjust the stack pointer for the stack arguments.
1155 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1156 &MemOpChains[0], MemOpChains.size());
1159 // Build a sequence of copy-to-reg nodes chained together with token chain
1160 // and flag operands which copy the outgoing args into the appropriate regs.
1162 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1163 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1165 InFlag = Chain.getValue(1);
1168 SmallVector<SDValue, 8> Ops;
1169 unsigned CallOpc = SPUISD::CALL;
1171 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1172 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1173 // node so that legalize doesn't hack it.
1174 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1175 GlobalValue *GV = G->getGlobal();
1176 MVT CalleeVT = Callee.getValueType();
1177 SDValue Zero = DAG.getConstant(0, PtrVT);
1178 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1180 if (!ST->usingLargeMem()) {
1181 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1182 // style calls, otherwise, external symbols are BRASL calls. This assumes
1183 // that declared/defined symbols are in the same compilation unit and can
1184 // be reached through PC-relative jumps.
1187 // This may be an unsafe assumption for JIT and really large compilation
1189 if (GV->isDeclaration()) {
1190 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1192 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1195 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1197 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1199 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1200 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1201 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1202 // If this is an absolute destination address that appears to be a legal
1203 // local store address, use the munged value.
1204 Callee = SDValue(Dest, 0);
1207 Ops.push_back(Chain);
1208 Ops.push_back(Callee);
1210 // Add argument registers to the end of the list so that they are known live
1212 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1213 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1214 RegsToPass[i].second.getValueType()));
1216 if (InFlag.getNode())
1217 Ops.push_back(InFlag);
1218 // Returns a chain and a flag for retval copy to use.
1219 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1220 &Ops[0], Ops.size());
1221 InFlag = Chain.getValue(1);
1223 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1224 DAG.getIntPtrConstant(0, true), InFlag);
1225 if (TheCall->getValueType(0) != MVT::Other)
1226 InFlag = Chain.getValue(1);
1228 SDValue ResultVals[3];
1229 unsigned NumResults = 0;
1231 // If the call has results, copy the values out of the ret val registers.
1232 switch (TheCall->getValueType(0).getSimpleVT()) {
1233 default: assert(0 && "Unexpected ret value!");
1234 case MVT::Other: break;
1236 if (TheCall->getValueType(1) == MVT::i32) {
1237 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1238 ResultVals[0] = Chain.getValue(0);
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1240 Chain.getValue(2)).getValue(1);
1241 ResultVals[1] = Chain.getValue(0);
1244 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1245 ResultVals[0] = Chain.getValue(0);
1250 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1251 ResultVals[0] = Chain.getValue(0);
1256 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1257 InFlag).getValue(1);
1258 ResultVals[0] = Chain.getValue(0);
1267 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1268 InFlag).getValue(1);
1269 ResultVals[0] = Chain.getValue(0);
1274 // If the function returns void, just return the chain.
1275 if (NumResults == 0)
1278 // Otherwise, merge everything together with a MERGE_VALUES node.
1279 ResultVals[NumResults++] = Chain;
1280 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1281 return Res.getValue(Op.getResNo());
1285 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1286 SmallVector<CCValAssign, 16> RVLocs;
1287 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1288 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1289 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1290 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1292 // If this is the first return lowered for this function, add the regs to the
1293 // liveout set for the function.
1294 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1295 for (unsigned i = 0; i != RVLocs.size(); ++i)
1296 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1299 SDValue Chain = Op.getOperand(0);
1302 // Copy the result values into the output registers.
1303 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1304 CCValAssign &VA = RVLocs[i];
1305 assert(VA.isRegLoc() && "Can only return in registers!");
1306 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1307 Flag = Chain.getValue(1);
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1313 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1317 //===----------------------------------------------------------------------===//
1318 // Vector related lowering:
1319 //===----------------------------------------------------------------------===//
1321 static ConstantSDNode *
1322 getVecImm(SDNode *N) {
1323 SDValue OpVal(0, 0);
1325 // Check to see if this buildvec has a single non-undef value in its elements.
1326 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1327 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1328 if (OpVal.getNode() == 0)
1329 OpVal = N->getOperand(i);
1330 else if (OpVal != N->getOperand(i))
1334 if (OpVal.getNode() != 0) {
1335 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1340 return 0; // All UNDEF: use implicit def.; not Constant node
1343 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1344 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1346 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1348 if (ConstantSDNode *CN = getVecImm(N)) {
1349 uint64_t Value = CN->getZExtValue();
1350 if (ValueType == MVT::i64) {
1351 uint64_t UValue = CN->getZExtValue();
1352 uint32_t upper = uint32_t(UValue >> 32);
1353 uint32_t lower = uint32_t(UValue);
1356 Value = Value >> 32;
1358 if (Value <= 0x3ffff)
1359 return DAG.getTargetConstant(Value, ValueType);
1365 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1366 /// and the value fits into a signed 16-bit constant, and if so, return the
1368 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1370 if (ConstantSDNode *CN = getVecImm(N)) {
1371 int64_t Value = CN->getSExtValue();
1372 if (ValueType == MVT::i64) {
1373 uint64_t UValue = CN->getZExtValue();
1374 uint32_t upper = uint32_t(UValue >> 32);
1375 uint32_t lower = uint32_t(UValue);
1378 Value = Value >> 32;
1380 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1381 return DAG.getTargetConstant(Value, ValueType);
1388 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1389 /// and the value fits into a signed 10-bit constant, and if so, return the
1391 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1393 if (ConstantSDNode *CN = getVecImm(N)) {
1394 int64_t Value = CN->getSExtValue();
1395 if (ValueType == MVT::i64) {
1396 uint64_t UValue = CN->getZExtValue();
1397 uint32_t upper = uint32_t(UValue >> 32);
1398 uint32_t lower = uint32_t(UValue);
1401 Value = Value >> 32;
1403 if (isS10Constant(Value))
1404 return DAG.getTargetConstant(Value, ValueType);
1410 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1411 /// and the value fits into a signed 8-bit constant, and if so, return the
1414 /// @note: The incoming vector is v16i8 because that's the only way we can load
1415 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1417 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1419 if (ConstantSDNode *CN = getVecImm(N)) {
1420 int Value = (int) CN->getZExtValue();
1421 if (ValueType == MVT::i16
1422 && Value <= 0xffff /* truncated from uint64_t */
1423 && ((short) Value >> 8) == ((short) Value & 0xff))
1424 return DAG.getTargetConstant(Value & 0xff, ValueType);
1425 else if (ValueType == MVT::i8
1426 && (Value & 0xff) == Value)
1427 return DAG.getTargetConstant(Value, ValueType);
1433 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1434 /// and the value fits into a signed 16-bit constant, and if so, return the
1436 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1438 if (ConstantSDNode *CN = getVecImm(N)) {
1439 uint64_t Value = CN->getZExtValue();
1440 if ((ValueType == MVT::i32
1441 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1442 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1443 return DAG.getTargetConstant(Value >> 16, ValueType);
1449 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1450 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1451 if (ConstantSDNode *CN = getVecImm(N)) {
1452 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1458 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1459 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1460 if (ConstantSDNode *CN = getVecImm(N)) {
1461 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1467 // If this is a vector of constants or undefs, get the bits. A bit in
1468 // UndefBits is set if the corresponding element of the vector is an
1469 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1470 // zero. Return true if this is not an array of constants, false if it is.
1472 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1473 uint64_t UndefBits[2]) {
1474 // Start with zero'd results.
1475 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1477 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1478 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1479 SDValue OpVal = BV->getOperand(i);
1481 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1482 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1484 uint64_t EltBits = 0;
1485 if (OpVal.getOpcode() == ISD::UNDEF) {
1486 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1487 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1489 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1490 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1491 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1492 const APFloat &apf = CN->getValueAPF();
1493 EltBits = (CN->getValueType(0) == MVT::f32
1494 ? FloatToBits(apf.convertToFloat())
1495 : DoubleToBits(apf.convertToDouble()));
1497 // Nonconstant element.
1501 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1504 //printf("%llx %llx %llx %llx\n",
1505 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1509 /// If this is a splat (repetition) of a value across the whole vector, return
1510 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1511 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1512 /// SplatSize = 1 byte.
1513 static bool isConstantSplat(const uint64_t Bits128[2],
1514 const uint64_t Undef128[2],
1516 uint64_t &SplatBits, uint64_t &SplatUndef,
1518 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1519 // the same as the lower 64-bits, ignoring undefs.
1520 uint64_t Bits64 = Bits128[0] | Bits128[1];
1521 uint64_t Undef64 = Undef128[0] & Undef128[1];
1522 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1523 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1524 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1525 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1527 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1528 if (MinSplatBits < 64) {
1530 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1532 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1533 if (MinSplatBits < 32) {
1535 // If the top 16-bits are different than the lower 16-bits, ignoring
1536 // undefs, we have an i32 splat.
1537 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1538 if (MinSplatBits < 16) {
1539 // If the top 8-bits are different than the lower 8-bits, ignoring
1540 // undefs, we have an i16 splat.
1541 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1542 == ((Bits16 >> 8) & ~Undef16)) {
1543 // Otherwise, we have an 8-bit splat.
1544 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1545 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1551 SplatUndef = Undef16;
1558 SplatUndef = Undef32;
1564 SplatBits = Bits128[0];
1565 SplatUndef = Undef128[0];
1571 return false; // Can't be a splat if two pieces don't match.
1574 // If this is a case we can't handle, return null and let the default
1575 // expansion code take care of it. If we CAN select this case, and if it
1576 // selects to a single instruction, return Op. Otherwise, if we can codegen
1577 // this case more efficiently than a constant pool load, lower it to the
1578 // sequence of ops that should be used.
1579 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1580 MVT VT = Op.getValueType();
1581 // If this is a vector of constants or undefs, get the bits. A bit in
1582 // UndefBits is set if the corresponding element of the vector is an
1583 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1585 uint64_t VectorBits[2];
1586 uint64_t UndefBits[2];
1587 uint64_t SplatBits, SplatUndef;
1589 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1590 || !isConstantSplat(VectorBits, UndefBits,
1591 VT.getVectorElementType().getSizeInBits(),
1592 SplatBits, SplatUndef, SplatSize))
1593 return SDValue(); // Not a constant vector, not a splat.
1595 switch (VT.getSimpleVT()) {
1598 uint32_t Value32 = SplatBits;
1599 assert(SplatSize == 4
1600 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1601 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602 SDValue T = DAG.getConstant(Value32, MVT::i32);
1603 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1604 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1608 uint64_t f64val = SplatBits;
1609 assert(SplatSize == 8
1610 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1611 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1612 SDValue T = DAG.getConstant(f64val, MVT::i64);
1613 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1614 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1618 // 8-bit constants have to be expanded to 16-bits
1619 unsigned short Value16 = SplatBits | (SplatBits << 8);
1621 for (int i = 0; i < 8; ++i)
1622 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1623 return DAG.getNode(ISD::BIT_CONVERT, VT,
1624 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1627 unsigned short Value16;
1629 Value16 = (unsigned short) (SplatBits & 0xffff);
1631 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1632 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1634 for (int i = 0; i < 8; ++i) Ops[i] = T;
1635 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1638 unsigned int Value = SplatBits;
1639 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1640 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1643 uint64_t val = SplatBits;
1644 uint32_t upper = uint32_t(val >> 32);
1645 uint32_t lower = uint32_t(val);
1647 if (upper == lower) {
1648 // Magic constant that can be matched by IL, ILA, et. al.
1649 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1650 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1654 SmallVector<SDValue, 16> ShufBytes;
1656 bool upper_special, lower_special;
1658 // NOTE: This code creates common-case shuffle masks that can be easily
1659 // detected as common expressions. It is not attempting to create highly
1660 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1662 // Detect if the upper or lower half is a special shuffle mask pattern:
1663 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1664 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1666 // Create lower vector if not a special pattern
1667 if (!lower_special) {
1668 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1669 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1670 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1671 LO32C, LO32C, LO32C, LO32C));
1674 // Create upper vector if not a special pattern
1675 if (!upper_special) {
1676 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1677 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1678 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1679 HI32C, HI32C, HI32C, HI32C));
1682 // If either upper or lower are special, then the two input operands are
1683 // the same (basically, one of them is a "don't care")
1688 if (lower_special && upper_special) {
1689 // Unhappy situation... both upper and lower are special, so punt with
1690 // a target constant:
1691 SDValue Zero = DAG.getConstant(0, MVT::i32);
1692 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1696 for (int i = 0; i < 4; ++i) {
1698 for (int j = 0; j < 4; ++j) {
1700 bool process_upper, process_lower;
1702 process_upper = (upper_special && (i & 1) == 0);
1703 process_lower = (lower_special && (i & 1) == 1);
1705 if (process_upper || process_lower) {
1706 if ((process_upper && upper == 0)
1707 || (process_lower && lower == 0))
1709 else if ((process_upper && upper == 0xffffffff)
1710 || (process_lower && lower == 0xffffffff))
1712 else if ((process_upper && upper == 0x80000000)
1713 || (process_lower && lower == 0x80000000))
1714 val |= (j == 0 ? 0xe0 : 0x80);
1716 val |= i * 4 + j + ((i & 1) * 16);
1719 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1722 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1723 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1724 &ShufBytes[0], ShufBytes.size()));
1732 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1733 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1734 /// permutation vector, V3, is monotonically increasing with one "exception"
1735 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1736 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1737 /// In either case, the net result is going to eventually invoke SHUFB to
1738 /// permute/shuffle the bytes from V1 and V2.
1740 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1741 /// control word for byte/halfword/word insertion. This takes care of a single
1742 /// element move from V2 into V1.
1744 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1745 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1746 SDValue V1 = Op.getOperand(0);
1747 SDValue V2 = Op.getOperand(1);
1748 SDValue PermMask = Op.getOperand(2);
1750 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1752 // If we have a single element being moved from V1 to V2, this can be handled
1753 // using the C*[DX] compute mask instructions, but the vector elements have
1754 // to be monotonically increasing with one exception element.
1755 MVT VecVT = V1.getValueType();
1756 MVT EltVT = VecVT.getVectorElementType();
1757 unsigned EltsFromV2 = 0;
1759 unsigned V2EltIdx0 = 0;
1760 unsigned CurrElt = 0;
1761 unsigned MaxElts = VecVT.getVectorNumElements();
1762 unsigned PrevElt = 0;
1764 bool monotonic = true;
1767 if (EltVT == MVT::i8) {
1769 } else if (EltVT == MVT::i16) {
1771 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1773 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1776 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1778 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1779 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1780 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1783 if (SrcElt >= V2EltIdx0) {
1784 if (1 >= (++EltsFromV2)) {
1785 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1787 } else if (CurrElt != SrcElt) {
1795 if (PrevElt > 0 && SrcElt < MaxElts) {
1796 if ((PrevElt == SrcElt - 1)
1797 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1804 } else if (PrevElt == 0) {
1805 // First time through, need to keep track of previous element
1808 // This isn't a rotation, takes elements from vector 2
1815 if (EltsFromV2 == 1 && monotonic) {
1816 // Compute mask and shuffle
1817 MachineFunction &MF = DAG.getMachineFunction();
1818 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1819 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1820 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1821 // Initialize temporary register to 0
1822 SDValue InitTempReg =
1823 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1824 // Copy register's contents as index in SHUFFLE_MASK:
1825 SDValue ShufMaskOp =
1826 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1827 DAG.getTargetConstant(V2Elt, MVT::i32),
1828 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1829 // Use shuffle mask in SHUFB synthetic instruction:
1830 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1831 } else if (rotate) {
1832 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1834 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1835 V1, DAG.getConstant(rotamt, MVT::i16));
1837 // Convert the SHUFFLE_VECTOR mask's input element units to the
1839 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1841 SmallVector<SDValue, 16> ResultMask;
1842 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1844 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1847 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1849 for (unsigned j = 0; j < BytesPerElement; ++j) {
1850 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1855 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1856 &ResultMask[0], ResultMask.size());
1857 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1861 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1862 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1864 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1865 // For a constant, build the appropriate constant vector, which will
1866 // eventually simplify to a vector register load.
1868 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1869 SmallVector<SDValue, 16> ConstVecValues;
1873 // Create a constant vector:
1874 switch (Op.getValueType().getSimpleVT()) {
1875 default: assert(0 && "Unexpected constant value type in "
1876 "LowerSCALAR_TO_VECTOR");
1877 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1878 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1879 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1880 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1881 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1882 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1885 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1886 for (size_t j = 0; j < n_copies; ++j)
1887 ConstVecValues.push_back(CValue);
1889 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1890 &ConstVecValues[0], ConstVecValues.size());
1892 // Otherwise, copy the value from one register to another:
1893 switch (Op0.getValueType().getSimpleVT()) {
1894 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1901 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1908 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1909 switch (Op.getValueType().getSimpleVT()) {
1911 cerr << "CellSPU: Unknown vector multiplication, got "
1912 << Op.getValueType().getMVTString()
1918 SDValue rA = Op.getOperand(0);
1919 SDValue rB = Op.getOperand(1);
1920 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1921 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1922 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1923 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1925 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1929 // Multiply two v8i16 vectors (pipeline friendly version):
1930 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1931 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1932 // c) Use SELB to select upper and lower halves from the intermediate results
1934 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1935 // dual-issue. This code does manage to do this, even if it's a little on
1938 MachineFunction &MF = DAG.getMachineFunction();
1939 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1940 SDValue Chain = Op.getOperand(0);
1941 SDValue rA = Op.getOperand(0);
1942 SDValue rB = Op.getOperand(1);
1943 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1944 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1947 DAG.getCopyToReg(Chain, FSMBIreg,
1948 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1949 DAG.getConstant(0xcccc, MVT::i16)));
1952 DAG.getCopyToReg(FSMBOp, HiProdReg,
1953 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1955 SDValue HHProd_v4i32 =
1956 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1957 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1959 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1960 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1961 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1962 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1964 DAG.getConstant(16, MVT::i16))),
1965 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1968 // This M00sE is N@stI! (apologies to Monty Python)
1970 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1971 // is to break it all apart, sign extend, and reassemble the various
1972 // intermediate products.
1974 SDValue rA = Op.getOperand(0);
1975 SDValue rB = Op.getOperand(1);
1976 SDValue c8 = DAG.getConstant(8, MVT::i32);
1977 SDValue c16 = DAG.getConstant(16, MVT::i32);
1980 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1982 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1984 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1986 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1989 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1990 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1992 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1993 DAG.getConstant(0x2222, MVT::i16));
1995 SDValue LoProdParts =
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1997 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1998 LLProd, LHProd, FSMBmask));
2000 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2003 DAG.getNode(ISD::AND, MVT::v4i32,
2005 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2006 LoProdMask, LoProdMask,
2007 LoProdMask, LoProdMask));
2010 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2011 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2014 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2015 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2018 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2019 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2020 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2023 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2024 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2025 DAG.getNode(SPUISD::VEC_SRA,
2026 MVT::v4i32, rAH, c8)),
2027 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2028 DAG.getNode(SPUISD::VEC_SRA,
2029 MVT::v4i32, rBH, c8)));
2032 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2034 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2038 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2040 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2041 DAG.getNode(ISD::OR, MVT::v4i32,
2049 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2050 MachineFunction &MF = DAG.getMachineFunction();
2051 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2053 SDValue A = Op.getOperand(0);
2054 SDValue B = Op.getOperand(1);
2055 MVT VT = Op.getValueType();
2057 unsigned VRegBR, VRegC;
2059 if (VT == MVT::f32) {
2060 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2061 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2063 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2064 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2066 // TODO: make sure we're feeding FPInterp the right arguments
2067 // Right now: fi B, frest(B)
2070 // (Floating Interpolate (FP Reciprocal Estimate B))
2072 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2073 DAG.getNode(SPUISD::FPInterp, VT, B,
2074 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2076 // Computes A * BRcpl and stores in a temporary register
2078 DAG.getCopyToReg(BRcpl, VRegC,
2079 DAG.getNode(ISD::FMUL, VT, A,
2080 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2081 // What's the Chain variable do? It's magic!
2082 // TODO: set Chain = Op(0).getEntryNode()
2084 return DAG.getNode(ISD::FADD, VT,
2085 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2086 DAG.getNode(ISD::FMUL, VT,
2087 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2088 DAG.getNode(ISD::FSUB, VT, A,
2089 DAG.getNode(ISD::FMUL, VT, B,
2090 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2093 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2094 MVT VT = Op.getValueType();
2095 SDValue N = Op.getOperand(0);
2096 SDValue Elt = Op.getOperand(1);
2099 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2100 // Constant argument:
2101 int EltNo = (int) C->getZExtValue();
2104 if (VT == MVT::i8 && EltNo >= 16)
2105 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2106 else if (VT == MVT::i16 && EltNo >= 8)
2107 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2108 else if (VT == MVT::i32 && EltNo >= 4)
2109 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2110 else if (VT == MVT::i64 && EltNo >= 2)
2111 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2113 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2114 // i32 and i64: Element 0 is the preferred slot
2115 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2118 // Need to generate shuffle mask and extract:
2119 int prefslot_begin = -1, prefslot_end = -1;
2120 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2122 switch (VT.getSimpleVT()) {
2124 assert(false && "Invalid value type!");
2126 prefslot_begin = prefslot_end = 3;
2130 prefslot_begin = 2; prefslot_end = 3;
2135 prefslot_begin = 0; prefslot_end = 3;
2140 prefslot_begin = 0; prefslot_end = 7;
2145 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2146 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2148 unsigned int ShufBytes[16];
2149 for (int i = 0; i < 16; ++i) {
2150 // zero fill uppper part of preferred slot, don't care about the
2152 unsigned int mask_val;
2153 if (i <= prefslot_end) {
2155 ((i < prefslot_begin)
2157 : elt_byte + (i - prefslot_begin));
2159 ShufBytes[i] = mask_val;
2161 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2164 SDValue ShufMask[4];
2165 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2166 unsigned bidx = i * 4;
2167 unsigned int bits = ((ShufBytes[bidx] << 24) |
2168 (ShufBytes[bidx+1] << 16) |
2169 (ShufBytes[bidx+2] << 8) |
2171 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2174 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2176 sizeof(ShufMask) / sizeof(ShufMask[0]));
2178 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2179 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2180 N, N, ShufMaskVec));
2182 // Variable index: Rotate the requested element into slot 0, then replicate
2183 // slot 0 across the vector
2184 MVT VecVT = N.getValueType();
2185 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2186 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2190 // Make life easier by making sure the index is zero-extended to i32
2191 if (Elt.getValueType() != MVT::i32)
2192 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2194 // Scale the index to a bit/byte shift quantity
2196 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2197 unsigned scaleShift = scaleFactor.logBase2();
2200 if (scaleShift > 0) {
2201 // Scale the shift factor:
2202 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2203 DAG.getConstant(scaleShift, MVT::i32));
2206 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2208 // Replicate the bytes starting at byte 0 across the entire vector (for
2209 // consistency with the notion of a unified register set)
2212 switch (VT.getSimpleVT()) {
2214 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2218 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2219 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2224 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2225 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2231 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2232 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2238 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2239 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2240 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2241 loFactor, hiFactor);
2246 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2247 DAG.getNode(SPUISD::SHUFB, VecVT,
2248 vecShift, vecShift, replicate));
2254 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2255 SDValue VecOp = Op.getOperand(0);
2256 SDValue ValOp = Op.getOperand(1);
2257 SDValue IdxOp = Op.getOperand(2);
2258 MVT VT = Op.getValueType();
2260 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2261 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2263 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2264 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2265 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2266 DAG.getRegister(SPU::R1, PtrVT),
2267 DAG.getConstant(CN->getSExtValue(), PtrVT));
2268 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2271 DAG.getNode(SPUISD::SHUFB, VT,
2272 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2274 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2279 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2281 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2283 assert(Op.getValueType() == MVT::i8);
2286 assert(0 && "Unhandled i8 math operator");
2290 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2292 SDValue N1 = Op.getOperand(1);
2293 N0 = (N0.getOpcode() != ISD::Constant
2294 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2295 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2297 N1 = (N1.getOpcode() != ISD::Constant
2298 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2299 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2301 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2302 DAG.getNode(Opc, MVT::i16, N0, N1));
2306 SDValue N1 = Op.getOperand(1);
2308 N0 = (N0.getOpcode() != ISD::Constant
2309 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2310 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2312 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2315 N1 = (N1.getOpcode() != ISD::Constant
2316 ? DAG.getNode(N1Opc, MVT::i32, N1)
2317 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2320 DAG.getNode(ISD::OR, MVT::i16, N0,
2321 DAG.getNode(ISD::SHL, MVT::i16,
2322 N0, DAG.getConstant(8, MVT::i32)));
2323 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2324 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2328 SDValue N1 = Op.getOperand(1);
2330 N0 = (N0.getOpcode() != ISD::Constant
2331 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2332 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2334 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2337 N1 = (N1.getOpcode() != ISD::Constant
2338 ? DAG.getNode(N1Opc, MVT::i16, N1)
2339 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2341 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2342 DAG.getNode(Opc, MVT::i16, N0, N1));
2345 SDValue N1 = Op.getOperand(1);
2347 N0 = (N0.getOpcode() != ISD::Constant
2348 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2349 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2351 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2354 N1 = (N1.getOpcode() != ISD::Constant
2355 ? DAG.getNode(N1Opc, MVT::i16, N1)
2356 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2358 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2359 DAG.getNode(Opc, MVT::i16, N0, N1));
2362 SDValue N1 = Op.getOperand(1);
2364 N0 = (N0.getOpcode() != ISD::Constant
2365 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2366 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2368 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2369 N1 = (N1.getOpcode() != ISD::Constant
2370 ? DAG.getNode(N1Opc, MVT::i16, N1)
2371 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2373 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2374 DAG.getNode(Opc, MVT::i16, N0, N1));
2382 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2384 MVT VT = Op.getValueType();
2385 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2387 SDValue Op0 = Op.getOperand(0);
2390 case ISD::ZERO_EXTEND:
2391 case ISD::SIGN_EXTEND:
2392 case ISD::ANY_EXTEND: {
2393 MVT Op0VT = Op0.getValueType();
2394 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2396 assert(Op0VT == MVT::i32
2397 && "CellSPU: Zero/sign extending something other than i32");
2399 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2401 SDValue PromoteScalar =
2402 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2404 if (Opc != ISD::SIGN_EXTEND) {
2405 // Use a shuffle to zero extend the i32 to i64 directly:
2407 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2408 DAG.getConstant(0x80808080, MVT::i32),
2409 DAG.getConstant(0x00010203, MVT::i32),
2410 DAG.getConstant(0x80808080, MVT::i32),
2411 DAG.getConstant(0x08090a0b, MVT::i32));
2412 SDValue zextShuffle =
2413 DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2414 PromoteScalar, PromoteScalar, shufMask);
2416 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2417 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2419 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2420 // right and propagate the sign bit) instruction.
2422 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2423 PromoteScalar, DAG.getConstant(4, MVT::i32));
2425 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2426 PromoteScalar, DAG.getConstant(32, MVT::i32));
2428 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2429 DAG.getConstant(0xf0f0, MVT::i16));
2430 SDValue CombineQuad =
2431 DAG.getNode(SPUISD::SELB, Op0VecVT,
2432 SignQuad, RotQuad, SelMask);
2434 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2435 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2440 // Turn operands into vectors to satisfy type checking (shufb works on
2443 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2445 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2446 SmallVector<SDValue, 16> ShufBytes;
2448 // Create the shuffle mask for "rotating" the borrow up one register slot
2449 // once the borrow is generated.
2450 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2451 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2452 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2453 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2456 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2457 SDValue ShiftedCarry =
2458 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2460 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2461 &ShufBytes[0], ShufBytes.size()));
2463 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2464 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2465 Op0, Op1, ShiftedCarry));
2469 // Turn operands into vectors to satisfy type checking (shufb works on
2472 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2474 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2475 SmallVector<SDValue, 16> ShufBytes;
2477 // Create the shuffle mask for "rotating" the borrow up one register slot
2478 // once the borrow is generated.
2479 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2480 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2481 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2482 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2485 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2486 SDValue ShiftedBorrow =
2487 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2488 BorrowGen, BorrowGen,
2489 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2490 &ShufBytes[0], ShufBytes.size()));
2492 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2493 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2494 Op0, Op1, ShiftedBorrow));
2498 SDValue ShiftAmt = Op.getOperand(1);
2499 MVT ShiftAmtVT = ShiftAmt.getValueType();
2500 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2502 DAG.getNode(SPUISD::SELB, VecVT,
2504 DAG.getConstant(0, VecVT),
2505 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2506 DAG.getConstant(0xff00ULL, MVT::i16)));
2507 SDValue ShiftAmtBytes =
2508 DAG.getNode(ISD::SRL, ShiftAmtVT,
2510 DAG.getConstant(3, ShiftAmtVT));
2511 SDValue ShiftAmtBits =
2512 DAG.getNode(ISD::AND, ShiftAmtVT,
2514 DAG.getConstant(7, ShiftAmtVT));
2516 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2517 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2518 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2519 MaskLower, ShiftAmtBytes),
2524 MVT VT = Op.getValueType();
2525 SDValue ShiftAmt = Op.getOperand(1);
2526 MVT ShiftAmtVT = ShiftAmt.getValueType();
2527 SDValue ShiftAmtBytes =
2528 DAG.getNode(ISD::SRL, ShiftAmtVT,
2530 DAG.getConstant(3, ShiftAmtVT));
2531 SDValue ShiftAmtBits =
2532 DAG.getNode(ISD::AND, ShiftAmtVT,
2534 DAG.getConstant(7, ShiftAmtVT));
2536 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2537 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2538 Op0, ShiftAmtBytes),
2543 // Promote Op0 to vector
2545 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2546 SDValue ShiftAmt = Op.getOperand(1);
2547 MVT ShiftVT = ShiftAmt.getValueType();
2549 // Negate variable shift amounts
2550 if (!isa<ConstantSDNode>(ShiftAmt)) {
2551 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2552 DAG.getConstant(0, ShiftVT), ShiftAmt);
2555 SDValue UpperHalfSign =
2556 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2557 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2558 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2559 Op0, DAG.getConstant(31, MVT::i32))));
2560 SDValue UpperHalfSignMask =
2561 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2562 SDValue UpperLowerMask =
2563 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2564 DAG.getConstant(0xff00, MVT::i16));
2565 SDValue UpperLowerSelect =
2566 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2567 UpperHalfSignMask, Op0, UpperLowerMask);
2568 SDValue RotateLeftBytes =
2569 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2570 UpperLowerSelect, ShiftAmt);
2571 SDValue RotateLeftBits =
2572 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2573 RotateLeftBytes, ShiftAmt);
2575 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2583 //! Lower byte immediate operations for v16i8 vectors:
2585 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2588 MVT VT = Op.getValueType();
2590 ConstVec = Op.getOperand(0);
2591 Arg = Op.getOperand(1);
2592 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2593 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2594 ConstVec = ConstVec.getOperand(0);
2596 ConstVec = Op.getOperand(1);
2597 Arg = Op.getOperand(0);
2598 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2599 ConstVec = ConstVec.getOperand(0);
2604 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2605 uint64_t VectorBits[2];
2606 uint64_t UndefBits[2];
2607 uint64_t SplatBits, SplatUndef;
2610 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2611 && isConstantSplat(VectorBits, UndefBits,
2612 VT.getVectorElementType().getSizeInBits(),
2613 SplatBits, SplatUndef, SplatSize)) {
2615 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2616 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2618 // Turn the BUILD_VECTOR into a set of target constants:
2619 for (size_t i = 0; i < tcVecSize; ++i)
2622 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2623 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2626 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2627 // lowered. Return the operation, rather than a null SDValue.
2631 //! Lower i32 multiplication
2632 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2634 switch (VT.getSimpleVT()) {
2636 cerr << "CellSPU: Unknown LowerMUL value type, got "
2637 << Op.getValueType().getMVTString()
2643 SDValue rA = Op.getOperand(0);
2644 SDValue rB = Op.getOperand(1);
2646 return DAG.getNode(ISD::ADD, MVT::i32,
2647 DAG.getNode(ISD::ADD, MVT::i32,
2648 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2649 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2650 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2657 //! Custom lowering for CTPOP (count population)
2659 Custom lowering code that counts the number ones in the input
2660 operand. SPU has such an instruction, but it counts the number of
2661 ones per byte, which then have to be accumulated.
2663 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2664 MVT VT = Op.getValueType();
2665 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2667 switch (VT.getSimpleVT()) {
2669 assert(false && "Invalid value type!");
2671 SDValue N = Op.getOperand(0);
2672 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2674 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2675 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2677 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2681 MachineFunction &MF = DAG.getMachineFunction();
2682 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2684 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2686 SDValue N = Op.getOperand(0);
2687 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2688 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2689 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2691 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2692 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2694 // CNTB_result becomes the chain to which all of the virtual registers
2695 // CNTB_reg, SUM1_reg become associated:
2696 SDValue CNTB_result =
2697 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2699 SDValue CNTB_rescopy =
2700 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2702 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2704 return DAG.getNode(ISD::AND, MVT::i16,
2705 DAG.getNode(ISD::ADD, MVT::i16,
2706 DAG.getNode(ISD::SRL, MVT::i16,
2713 MachineFunction &MF = DAG.getMachineFunction();
2714 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2716 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2717 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2719 SDValue N = Op.getOperand(0);
2720 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2721 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2722 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2723 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2725 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2726 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2728 // CNTB_result becomes the chain to which all of the virtual registers
2729 // CNTB_reg, SUM1_reg become associated:
2730 SDValue CNTB_result =
2731 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2733 SDValue CNTB_rescopy =
2734 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2737 DAG.getNode(ISD::SRL, MVT::i32,
2738 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2741 DAG.getNode(ISD::ADD, MVT::i32,
2742 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2744 SDValue Sum1_rescopy =
2745 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2748 DAG.getNode(ISD::SRL, MVT::i32,
2749 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2752 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2753 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2755 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2765 //! Lower ISD::SELECT_CC
2767 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2770 \note Need to revisit this in the future: if the code path through the true
2771 and false value computations is longer than the latency of a branch (6
2772 cycles), then it would be more advantageous to branch and insert a new basic
2773 block and branch on the condition. However, this code does not make that
2774 assumption, given the simplisitc uses so far.
2777 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2778 MVT VT = Op.getValueType();
2779 SDValue lhs = Op.getOperand(0);
2780 SDValue rhs = Op.getOperand(1);
2781 SDValue trueval = Op.getOperand(2);
2782 SDValue falseval = Op.getOperand(3);
2783 SDValue condition = Op.getOperand(4);
2785 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2786 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2787 // with another "cannot select select_cc" assert:
2789 SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2790 return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2793 //! Custom lower ISD::TRUNCATE
2794 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2796 MVT VT = Op.getValueType();
2797 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2798 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2800 SDValue Op0 = Op.getOperand(0);
2801 MVT Op0VT = Op0.getValueType();
2802 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2804 SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2809 // Create shuffle mask
2810 switch (Op0VT.getSimpleVT()) {
2814 // least significant doubleword of quadword
2815 maskHigh = 0x08090a0b;
2816 maskLow = 0x0c0d0e0f;
2819 // least significant word of quadword
2820 maskHigh = maskLow = 0x0c0d0e0f;
2823 // least significant halfword of quadword
2824 maskHigh = maskLow = 0x0e0f0e0f;
2827 // least significant byte of quadword
2828 maskHigh = maskLow = 0x0f0f0f0f;
2831 cerr << "Truncation to illegal type!";
2838 // least significant word of doubleword
2839 maskHigh = maskLow = 0x04050607;
2842 // least significant halfword of doubleword
2843 maskHigh = maskLow = 0x06070607;
2846 // least significant byte of doubleword
2847 maskHigh = maskLow = 0x07070707;
2850 cerr << "Truncation to illegal type!";
2858 // least significant halfword of word
2859 maskHigh = maskLow = 0x02030203;
2862 // least significant byte of word/halfword
2863 maskHigh = maskLow = 0x03030303;
2866 cerr << "Truncation to illegal type!";
2871 cerr << "Trying to lower truncation from illegal type!";
2875 // Use a shuffle to perform the truncation
2876 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2877 DAG.getConstant(maskHigh, MVT::i32),
2878 DAG.getConstant(maskLow, MVT::i32),
2879 DAG.getConstant(maskHigh, MVT::i32),
2880 DAG.getConstant(maskLow, MVT::i32));
2882 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2883 PromoteScalar, PromoteScalar, shufMask);
2885 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2886 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2889 //! Custom (target-specific) lowering entry point
2891 This is where LLVM's DAG selection process calls to do target-specific
2895 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2897 unsigned Opc = (unsigned) Op.getOpcode();
2898 MVT VT = Op.getValueType();
2902 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2903 cerr << "Op.getOpcode() = " << Opc << "\n";
2904 cerr << "*Op.getNode():\n";
2905 Op.getNode()->dump();
2912 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2914 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2915 case ISD::ConstantPool:
2916 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2917 case ISD::GlobalAddress:
2918 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2919 case ISD::JumpTable:
2920 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2922 return LowerConstant(Op, DAG);
2923 case ISD::ConstantFP:
2924 return LowerConstantFP(Op, DAG);
2926 return LowerBRCOND(Op, DAG);
2927 case ISD::FORMAL_ARGUMENTS:
2928 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2930 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2932 return LowerRET(Op, DAG, getTargetMachine());
2935 // i8, i64 math ops:
2936 case ISD::ZERO_EXTEND:
2937 case ISD::SIGN_EXTEND:
2938 case ISD::ANY_EXTEND:
2947 return LowerI8Math(Op, DAG, Opc);
2948 else if (VT == MVT::i64)
2949 return LowerI64Math(Op, DAG, Opc);
2953 // Vector-related lowering.
2954 case ISD::BUILD_VECTOR:
2955 return LowerBUILD_VECTOR(Op, DAG);
2956 case ISD::SCALAR_TO_VECTOR:
2957 return LowerSCALAR_TO_VECTOR(Op, DAG);
2958 case ISD::VECTOR_SHUFFLE:
2959 return LowerVECTOR_SHUFFLE(Op, DAG);
2960 case ISD::EXTRACT_VECTOR_ELT:
2961 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2962 case ISD::INSERT_VECTOR_ELT:
2963 return LowerINSERT_VECTOR_ELT(Op, DAG);
2965 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2969 return LowerByteImmed(Op, DAG);
2971 // Vector and i8 multiply:
2974 return LowerVectorMUL(Op, DAG);
2975 else if (VT == MVT::i8)
2976 return LowerI8Math(Op, DAG, Opc);
2978 return LowerMUL(Op, DAG, VT, Opc);
2981 if (VT == MVT::f32 || VT == MVT::v4f32)
2982 return LowerFDIVf32(Op, DAG);
2984 // This is probably a libcall
2985 else if (Op.getValueType() == MVT::f64)
2986 return LowerFDIVf64(Op, DAG);
2989 assert(0 && "Calling FDIV on unsupported MVT");
2992 return LowerCTPOP(Op, DAG);
2994 case ISD::SELECT_CC:
2995 return LowerSELECT_CC(Op, DAG);
2998 return LowerTRUNCATE(Op, DAG);
3004 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
3005 SmallVectorImpl<SDValue>&Results,
3009 unsigned Opc = (unsigned) N->getOpcode();
3010 MVT OpVT = N->getValueType(0);
3014 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
3015 cerr << "Op.getOpcode() = " << Opc << "\n";
3016 cerr << "*Op.getNode():\n";
3024 /* Otherwise, return unchanged */
3027 //===----------------------------------------------------------------------===//
3028 // Target Optimization Hooks
3029 //===----------------------------------------------------------------------===//
3032 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
3035 TargetMachine &TM = getTargetMachine();
3037 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
3038 SelectionDAG &DAG = DCI.DAG;
3039 SDValue Op0 = N->getOperand(0); // everything has at least one operand
3040 MVT NodeVT = N->getValueType(0); // The node's value type
3041 MVT Op0VT = Op0.getValueType(); // The first operand's result
3042 SDValue Result; // Initially, empty result
3044 switch (N->getOpcode()) {
3047 SDValue Op1 = N->getOperand(1);
3049 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
3050 SDValue Op01 = Op0.getOperand(1);
3051 if (Op01.getOpcode() == ISD::Constant
3052 || Op01.getOpcode() == ISD::TargetConstant) {
3053 // (add <const>, (SPUindirect <arg>, <const>)) ->
3054 // (SPUindirect <arg>, <const + const>)
3055 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
3056 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
3057 SDValue combinedConst =
3058 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3060 #if !defined(NDEBUG)
3061 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3063 << "Replace: (add " << CN0->getZExtValue() << ", "
3064 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
3065 << "With: (SPUindirect <arg>, "
3066 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
3070 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
3071 Op0.getOperand(0), combinedConst);
3073 } else if (isa<ConstantSDNode>(Op0)
3074 && Op1.getOpcode() == SPUISD::IndirectAddr) {
3075 SDValue Op11 = Op1.getOperand(1);
3076 if (Op11.getOpcode() == ISD::Constant
3077 || Op11.getOpcode() == ISD::TargetConstant) {
3078 // (add (SPUindirect <arg>, <const>), <const>) ->
3079 // (SPUindirect <arg>, <const + const>)
3080 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
3081 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
3082 SDValue combinedConst =
3083 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3085 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
3086 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
3087 DEBUG(cerr << "With: (SPUindirect <arg>, "
3088 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
3090 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
3091 Op1.getOperand(0), combinedConst);
3096 case ISD::SIGN_EXTEND:
3097 case ISD::ZERO_EXTEND:
3098 case ISD::ANY_EXTEND: {
3099 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
3100 // (any_extend (SPUextract_elt0 <arg>)) ->
3101 // (SPUextract_elt0 <arg>)
3102 // Types must match, however...
3103 #if !defined(NDEBUG)
3104 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3105 cerr << "\nReplace: ";
3108 Op0.getNode()->dump(&DAG);
3117 case SPUISD::IndirectAddr: {
3118 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
3119 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
3120 if (CN->getZExtValue() == 0) {
3121 // (SPUindirect (SPUaform <addr>, 0), 0) ->
3122 // (SPUaform <addr>, 0)
3124 DEBUG(cerr << "Replace: ");
3125 DEBUG(N->dump(&DAG));
3126 DEBUG(cerr << "\nWith: ");
3127 DEBUG(Op0.getNode()->dump(&DAG));
3128 DEBUG(cerr << "\n");
3135 case SPUISD::SHLQUAD_L_BITS:
3136 case SPUISD::SHLQUAD_L_BYTES:
3137 case SPUISD::VEC_SHL:
3138 case SPUISD::VEC_SRL:
3139 case SPUISD::VEC_SRA:
3140 case SPUISD::ROTQUAD_RZ_BYTES:
3141 case SPUISD::ROTQUAD_RZ_BITS: {
3142 SDValue Op1 = N->getOperand(1);
3144 if (isa<ConstantSDNode>(Op1)) {
3145 // Kill degenerate vector shifts:
3146 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3147 if (CN->getZExtValue() == 0) {
3153 case SPUISD::PROMOTE_SCALAR: {
3154 switch (Op0.getOpcode()) {
3157 case ISD::ANY_EXTEND:
3158 case ISD::ZERO_EXTEND:
3159 case ISD::SIGN_EXTEND: {
3160 // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3162 // but only if the SPUpromote_scalar and <arg> types match.
3163 SDValue Op00 = Op0.getOperand(0);
3164 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3165 SDValue Op000 = Op00.getOperand(0);
3166 if (Op000.getValueType() == NodeVT) {
3172 case SPUISD::VEC2PREFSLOT: {
3173 // (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
3175 Result = Op0.getOperand(0);
3182 // Otherwise, return unchanged.
3184 if (Result.getNode()) {
3185 DEBUG(cerr << "\nReplace.SPU: ");
3186 DEBUG(N->dump(&DAG));
3187 DEBUG(cerr << "\nWith: ");
3188 DEBUG(Result.getNode()->dump(&DAG));
3189 DEBUG(cerr << "\n");
3196 //===----------------------------------------------------------------------===//
3197 // Inline Assembly Support
3198 //===----------------------------------------------------------------------===//
3200 /// getConstraintType - Given a constraint letter, return the type of
3201 /// constraint it is for this target.
3202 SPUTargetLowering::ConstraintType
3203 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3204 if (ConstraintLetter.size() == 1) {
3205 switch (ConstraintLetter[0]) {
3212 return C_RegisterClass;
3215 return TargetLowering::getConstraintType(ConstraintLetter);
3218 std::pair<unsigned, const TargetRegisterClass*>
3219 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3222 if (Constraint.size() == 1) {
3223 // GCC RS6000 Constraint Letters
3224 switch (Constraint[0]) {
3228 return std::make_pair(0U, SPU::R64CRegisterClass);
3229 return std::make_pair(0U, SPU::R32CRegisterClass);
3232 return std::make_pair(0U, SPU::R32FPRegisterClass);
3233 else if (VT == MVT::f64)
3234 return std::make_pair(0U, SPU::R64FPRegisterClass);
3237 return std::make_pair(0U, SPU::GPRCRegisterClass);
3241 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3244 //! Compute used/known bits for a SPU operand
3246 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3250 const SelectionDAG &DAG,
3251 unsigned Depth ) const {
3253 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3256 switch (Op.getOpcode()) {
3258 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3268 case SPUISD::PROMOTE_SCALAR: {
3269 SDValue Op0 = Op.getOperand(0);
3270 MVT Op0VT = Op0.getValueType();
3271 unsigned Op0VTBits = Op0VT.getSizeInBits();
3272 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3273 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3274 KnownOne |= APInt(Op0VTBits, InMask, false);
3278 case SPUISD::LDRESULT:
3279 case SPUISD::VEC2PREFSLOT: {
3280 MVT OpVT = Op.getValueType();
3281 unsigned OpVTBits = OpVT.getSizeInBits();
3282 uint64_t InMask = OpVT.getIntegerVTBitMask();
3283 KnownZero |= APInt(OpVTBits, ~InMask, false);
3284 KnownOne |= APInt(OpVTBits, InMask, false);
3293 case SPUISD::SHLQUAD_L_BITS:
3294 case SPUISD::SHLQUAD_L_BYTES:
3295 case SPUISD::VEC_SHL:
3296 case SPUISD::VEC_SRL:
3297 case SPUISD::VEC_SRA:
3298 case SPUISD::VEC_ROTL:
3299 case SPUISD::VEC_ROTR:
3300 case SPUISD::ROTQUAD_RZ_BYTES:
3301 case SPUISD::ROTQUAD_RZ_BITS:
3302 case SPUISD::ROTBYTES_LEFT:
3303 case SPUISD::SELECT_MASK:
3305 case SPUISD::FPInterp:
3306 case SPUISD::FPRecipEst:
3307 case SPUISD::SEXT32TO64:
3312 // LowerAsmOperandForConstraint
3314 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3315 char ConstraintLetter,
3317 std::vector<SDValue> &Ops,
3318 SelectionDAG &DAG) const {
3319 // Default, for the time being, to the base class handler
3320 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3324 /// isLegalAddressImmediate - Return true if the integer value can be used
3325 /// as the offset of the target addressing mode.
3326 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3327 const Type *Ty) const {
3328 // SPU's addresses are 256K:
3329 return (V > -(1 << 18) && V < (1 << 18) - 1);
3332 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3337 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3338 // The SPU target isn't yet aware of offsets.