1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
139 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
141 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
142 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
143 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
147 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
148 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
149 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
153 // SPU constant load actions are custom lowered:
154 setOperationAction(ISD::Constant, MVT::i64, Custom);
155 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
156 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
161 MVT VT = (MVT::SimpleValueType)sctype;
163 setOperationAction(ISD::LOAD, VT, Custom);
164 setOperationAction(ISD::STORE, VT, Custom);
167 // Custom lower BRCOND for i8 to "promote" the result to i16
168 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
170 // Expand the jumptable branches
171 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
172 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
174 // Custom lower SELECT_CC for most cases, but expand by default
175 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
176 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
177 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
178 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
180 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
183 // SPU has no intrinsics for these particular operations:
184 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
186 // PowerPC has no SREM/UREM instructions
187 setOperationAction(ISD::SREM, MVT::i32, Expand);
188 setOperationAction(ISD::UREM, MVT::i32, Expand);
189 setOperationAction(ISD::SREM, MVT::i64, Expand);
190 setOperationAction(ISD::UREM, MVT::i64, Expand);
192 // We don't support sin/cos/sqrt/fmod
193 setOperationAction(ISD::FSIN , MVT::f64, Expand);
194 setOperationAction(ISD::FCOS , MVT::f64, Expand);
195 setOperationAction(ISD::FREM , MVT::f64, Expand);
196 setOperationAction(ISD::FSIN , MVT::f32, Expand);
197 setOperationAction(ISD::FCOS , MVT::f32, Expand);
198 setOperationAction(ISD::FREM , MVT::f32, Expand);
200 // If we're enabling GP optimizations, use hardware square root
201 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
202 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
205 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
207 // SPU can do rotate right and left, so legalize it... but customize for i8
208 // because instructions don't exist.
210 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
212 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
213 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
214 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
216 setOperationAction(ISD::ROTL, MVT::i32, Legal);
217 setOperationAction(ISD::ROTL, MVT::i16, Legal);
218 setOperationAction(ISD::ROTL, MVT::i8, Custom);
220 // SPU has no native version of shift left/right for i8
221 setOperationAction(ISD::SHL, MVT::i8, Custom);
222 setOperationAction(ISD::SRL, MVT::i8, Custom);
223 setOperationAction(ISD::SRA, MVT::i8, Custom);
225 // SPU needs custom lowering for shift left/right for i64
226 setOperationAction(ISD::SHL, MVT::i64, Custom);
227 setOperationAction(ISD::SRL, MVT::i64, Custom);
228 setOperationAction(ISD::SRA, MVT::i64, Custom);
230 // Custom lower i8, i32 and i64 multiplications
231 setOperationAction(ISD::MUL, MVT::i8, Custom);
232 setOperationAction(ISD::MUL, MVT::i32, Custom);
233 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
235 // SMUL_LOHI, UMUL_LOHI
237 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
238 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
239 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
240 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
243 // Need to custom handle (some) common i8, i64 math ops
244 setOperationAction(ISD::ADD, MVT::i64, Custom);
245 setOperationAction(ISD::SUB, MVT::i8, Custom);
246 setOperationAction(ISD::SUB, MVT::i64, Custom);
248 // SPU does not have BSWAP. It does have i32 support CTLZ.
249 // CTPOP has to be custom lowered.
250 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
251 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
253 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
254 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
255 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
256 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
258 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
259 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
261 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
263 // SPU has a version of select that implements (a&~c)|(b&c), just like
264 // select ought to work:
265 setOperationAction(ISD::SELECT, MVT::i8, Legal);
266 setOperationAction(ISD::SELECT, MVT::i16, Legal);
267 setOperationAction(ISD::SELECT, MVT::i32, Legal);
268 setOperationAction(ISD::SELECT, MVT::i64, Expand);
270 setOperationAction(ISD::SETCC, MVT::i8, Legal);
271 setOperationAction(ISD::SETCC, MVT::i16, Legal);
272 setOperationAction(ISD::SETCC, MVT::i32, Legal);
273 setOperationAction(ISD::SETCC, MVT::i64, Expand);
275 // Zero extension and sign extension for i64 have to be
277 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
278 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
279 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
281 // Custom lower truncates
282 setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
283 setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
284 setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
285 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
287 // SPU has a legal FP -> signed INT instruction
288 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
289 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
290 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
291 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
293 // FDIV on SPU requires custom lowering
294 setOperationAction(ISD::FDIV, MVT::f32, Custom);
295 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
297 // SPU has [U|S]INT_TO_FP
298 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
317 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
323 MVT VT = (MVT::SimpleValueType)sctype;
325 setOperationAction(ISD::GlobalAddress, VT, Custom);
326 setOperationAction(ISD::ConstantPool, VT, Custom);
327 setOperationAction(ISD::JumpTable, VT, Custom);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET, MVT::Other, Custom);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART , MVT::Other, Custom);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG , MVT::Other, Expand);
338 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
339 setOperationAction(ISD::VAEND , MVT::Other, Expand);
340 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
341 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
347 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
364 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
365 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
366 MVT VT = (MVT::SimpleValueType)i;
368 // add/sub are legal for all supported vector VT's.
369 setOperationAction(ISD::ADD , VT, Legal);
370 setOperationAction(ISD::SUB , VT, Legal);
371 // mul has to be custom lowered.
372 setOperationAction(ISD::MUL , VT, Custom);
374 setOperationAction(ISD::AND , VT, Legal);
375 setOperationAction(ISD::OR , VT, Legal);
376 setOperationAction(ISD::XOR , VT, Legal);
377 setOperationAction(ISD::LOAD , VT, Legal);
378 setOperationAction(ISD::SELECT, VT, Legal);
379 setOperationAction(ISD::STORE, VT, Legal);
381 // These operations need to be expanded:
382 setOperationAction(ISD::SDIV, VT, Expand);
383 setOperationAction(ISD::SREM, VT, Expand);
384 setOperationAction(ISD::UDIV, VT, Expand);
385 setOperationAction(ISD::UREM, VT, Expand);
386 setOperationAction(ISD::FDIV, VT, Custom);
388 // Custom lower build_vector, constant pool spills, insert and
389 // extract vector elements:
390 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
391 setOperationAction(ISD::ConstantPool, VT, Custom);
392 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
393 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
394 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
395 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
398 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
399 setOperationAction(ISD::AND, MVT::v16i8, Custom);
400 setOperationAction(ISD::OR, MVT::v16i8, Custom);
401 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
402 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
404 setShiftAmountType(MVT::i32);
405 setBooleanContents(ZeroOrOneBooleanContent);
407 setStackPointerRegisterToSaveRestore(SPU::R1);
409 // We have target-specific dag combine patterns for the following nodes:
410 setTargetDAGCombine(ISD::ADD);
411 setTargetDAGCombine(ISD::ZERO_EXTEND);
412 setTargetDAGCombine(ISD::SIGN_EXTEND);
413 setTargetDAGCombine(ISD::ANY_EXTEND);
415 computeRegisterProperties();
417 // Set pre-RA register scheduler default to BURR, which produces slightly
418 // better code than the default (could also be TDRR, but TargetLowering.h
419 // needs a mod to support that model):
420 setSchedulingPreference(SchedulingForRegPressure);
424 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
426 if (node_names.empty()) {
427 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
428 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
429 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
430 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
431 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
432 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
433 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
434 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
435 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
436 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
437 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
438 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
439 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
440 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
441 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
442 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
443 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
445 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
446 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
447 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
448 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
449 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
450 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
451 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
452 "SPUISD::ROTQUAD_RZ_BYTES";
453 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
454 "SPUISD::ROTQUAD_RZ_BITS";
455 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
456 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
457 "SPUISD::ROTBYTES_LEFT_BITS";
458 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
459 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
460 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
461 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
462 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
463 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
464 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
465 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
466 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
469 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
471 return ((i != node_names.end()) ? i->second : 0);
474 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
475 MVT VT = Op.getValueType();
476 return (VT.isInteger() ? VT : MVT(MVT::i32));
479 //===----------------------------------------------------------------------===//
480 // Calling convention code:
481 //===----------------------------------------------------------------------===//
483 #include "SPUGenCallingConv.inc"
485 //===----------------------------------------------------------------------===//
486 // LowerOperation implementation
487 //===----------------------------------------------------------------------===//
489 /// Aligned load common code for CellSPU
491 \param[in] Op The SelectionDAG load or store operand
492 \param[in] DAG The selection DAG
493 \param[in] ST CellSPU subtarget information structure
494 \param[in,out] alignment Caller initializes this to the load or store node's
495 value from getAlignment(), may be updated while generating the aligned load
496 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
497 offset (divisible by 16, modulo 16 == 0)
498 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
499 offset of the preferred slot (modulo 16 != 0)
500 \param[in,out] VT Caller initializes this value type to the the load or store
501 node's loaded or stored value type; may be updated if an i1-extended load or
503 \param[out] was16aligned true if the base pointer had 16-byte alignment,
504 otherwise false. Can help to determine if the chunk needs to be rotated.
506 Both load and store lowering load a block of data aligned on a 16-byte
507 boundary. This is the common aligned load code shared between both.
510 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
512 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
513 MVT &VT, bool &was16aligned)
515 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
516 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
517 SDValue basePtr = LSN->getBasePtr();
518 SDValue chain = LSN->getChain();
520 if (basePtr.getOpcode() == ISD::ADD) {
521 SDValue Op1 = basePtr.getNode()->getOperand(1);
523 if (Op1.getOpcode() == ISD::Constant
524 || Op1.getOpcode() == ISD::TargetConstant) {
525 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
527 alignOffs = (int) CN->getZExtValue();
528 prefSlotOffs = (int) (alignOffs & 0xf);
530 // Adjust the rotation amount to ensure that the final result ends up in
531 // the preferred slot:
532 prefSlotOffs -= vtm->prefslot_byte;
533 basePtr = basePtr.getOperand(0);
535 // Loading from memory, can we adjust alignment?
536 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
537 SDValue APtr = basePtr.getOperand(0);
538 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
539 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
540 alignment = GSDN->getGlobal()->getAlignment();
545 prefSlotOffs = -vtm->prefslot_byte;
547 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
548 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
549 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
550 prefSlotOffs = (int) (alignOffs & 0xf);
551 prefSlotOffs -= vtm->prefslot_byte;
554 prefSlotOffs = -vtm->prefslot_byte;
557 if (alignment == 16) {
558 // Realign the base pointer as a D-Form address:
559 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
560 basePtr = DAG.getNode(ISD::ADD, PtrVT,
562 DAG.getConstant((alignOffs & ~0xf), PtrVT));
565 // Emit the vector load:
567 return DAG.getLoad(MVT::v16i8, chain, basePtr,
568 LSN->getSrcValue(), LSN->getSrcValueOffset(),
569 LSN->isVolatile(), 16);
572 // Unaligned load or we're using the "large memory" model, which means that
573 // we have to be very pessimistic:
574 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
575 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
576 DAG.getConstant(0, PtrVT));
580 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
581 DAG.getConstant((alignOffs & ~0xf), PtrVT));
582 was16aligned = false;
583 return DAG.getLoad(MVT::v16i8, chain, basePtr,
584 LSN->getSrcValue(), LSN->getSrcValueOffset(),
585 LSN->isVolatile(), 16);
588 /// Custom lower loads for CellSPU
590 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
591 within a 16-byte block, we have to rotate to extract the requested element.
593 For extending loads, we also want to ensure that the following sequence is
594 emitted, e.g. for MVT::f32 extending load to MVT::f64:
598 %2 v16i8,ch = rotate %1
599 %3 v4f8, ch = bitconvert %2
600 %4 f32 = vec2perfslot %3
601 %5 f64 = fp_extend %4
605 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
606 LoadSDNode *LN = cast<LoadSDNode>(Op);
607 SDValue the_chain = LN->getChain();
608 MVT InVT = LN->getMemoryVT();
609 MVT OutVT = Op.getValueType();
610 ISD::LoadExtType ExtType = LN->getExtensionType();
611 unsigned alignment = LN->getAlignment();
614 switch (LN->getAddressingMode()) {
615 case ISD::UNINDEXED: {
619 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
622 if (result.getNode() == 0)
625 the_chain = result.getValue(1);
626 // Rotate the chunk if necessary
629 if (rotamt != 0 || !was16aligned) {
630 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
634 Ops[1] = DAG.getConstant(rotamt, MVT::i16);
636 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
637 LoadSDNode *LN1 = cast<LoadSDNode>(result);
638 Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
639 DAG.getConstant(rotamt, PtrVT));
642 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
645 // Convert the loaded v16i8 vector to the appropriate vector type
646 // specified by the operand:
647 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
648 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
649 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
651 // Handle extending loads by extending the scalar result:
652 if (ExtType == ISD::SEXTLOAD) {
653 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
654 } else if (ExtType == ISD::ZEXTLOAD) {
655 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
656 } else if (ExtType == ISD::EXTLOAD) {
657 unsigned NewOpc = ISD::ANY_EXTEND;
659 if (OutVT.isFloatingPoint())
660 NewOpc = ISD::FP_EXTEND;
662 result = DAG.getNode(NewOpc, OutVT, result);
665 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
666 SDValue retops[2] = {
671 result = DAG.getNode(SPUISD::LDRESULT, retvts,
672 retops, sizeof(retops) / sizeof(retops[0]));
679 case ISD::LAST_INDEXED_MODE:
680 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
682 cerr << (unsigned) LN->getAddressingMode() << "\n";
690 /// Custom lower stores for CellSPU
692 All CellSPU stores are aligned to 16-byte boundaries, so for elements
693 within a 16-byte block, we have to generate a shuffle to insert the
694 requested element into its place, then store the resulting block.
697 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
698 StoreSDNode *SN = cast<StoreSDNode>(Op);
699 SDValue Value = SN->getValue();
700 MVT VT = Value.getValueType();
701 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
702 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
703 unsigned alignment = SN->getAlignment();
705 switch (SN->getAddressingMode()) {
706 case ISD::UNINDEXED: {
707 int chunk_offset, slot_offset;
710 // The vector type we really want to load from the 16-byte chunk.
711 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
712 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
714 SDValue alignLoadVec =
715 AlignedLoad(Op, DAG, ST, SN, alignment,
716 chunk_offset, slot_offset, VT, was16aligned);
718 if (alignLoadVec.getNode() == 0)
721 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
722 SDValue basePtr = LN->getBasePtr();
723 SDValue the_chain = alignLoadVec.getValue(1);
724 SDValue theValue = SN->getValue();
728 && (theValue.getOpcode() == ISD::AssertZext
729 || theValue.getOpcode() == ISD::AssertSext)) {
730 // Drill down and get the value for zero- and sign-extended
732 theValue = theValue.getOperand(0);
737 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
738 SDValue insertEltPtr;
740 // If the base pointer is already a D-form address, then just create
741 // a new D-form address with a slot offset and the orignal base pointer.
742 // Otherwise generate a D-form address with the slot offset relative
743 // to the stack pointer, which is always aligned.
744 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
745 DEBUG(basePtr.getNode()->dump(&DAG));
748 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
749 (basePtr.getOpcode() == ISD::ADD
750 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
751 insertEltPtr = basePtr;
753 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
756 SDValue insertEltOp =
757 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
758 SDValue vectorizeOp =
759 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
761 result = DAG.getNode(SPUISD::SHUFB, vecVT,
762 vectorizeOp, alignLoadVec,
763 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
765 result = DAG.getStore(the_chain, result, basePtr,
766 LN->getSrcValue(), LN->getSrcValueOffset(),
767 LN->isVolatile(), LN->getAlignment());
769 #if 0 && !defined(NDEBUG)
770 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
771 const SDValue ¤tRoot = DAG.getRoot();
774 cerr << "------- CellSPU:LowerStore result:\n";
777 DAG.setRoot(currentRoot);
788 case ISD::LAST_INDEXED_MODE:
789 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
791 cerr << (unsigned) SN->getAddressingMode() << "\n";
799 /// Generate the address of a constant pool entry.
801 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
802 MVT PtrVT = Op.getValueType();
803 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
804 Constant *C = CP->getConstVal();
805 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
806 SDValue Zero = DAG.getConstant(0, PtrVT);
807 const TargetMachine &TM = DAG.getTarget();
809 if (TM.getRelocationModel() == Reloc::Static) {
810 if (!ST->usingLargeMem()) {
811 // Just return the SDValue with the constant pool address in it.
812 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
814 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
815 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
816 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
821 "LowerConstantPool: Relocation model other than static"
827 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
828 MVT PtrVT = Op.getValueType();
829 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
830 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
831 SDValue Zero = DAG.getConstant(0, PtrVT);
832 const TargetMachine &TM = DAG.getTarget();
834 if (TM.getRelocationModel() == Reloc::Static) {
835 if (!ST->usingLargeMem()) {
836 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
838 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
839 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
840 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
845 "LowerJumpTable: Relocation model other than static not supported.");
850 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
851 MVT PtrVT = Op.getValueType();
852 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
853 GlobalValue *GV = GSDN->getGlobal();
854 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
855 const TargetMachine &TM = DAG.getTarget();
856 SDValue Zero = DAG.getConstant(0, PtrVT);
858 if (TM.getRelocationModel() == Reloc::Static) {
859 if (!ST->usingLargeMem()) {
860 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
862 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
863 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
864 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
867 cerr << "LowerGlobalAddress: Relocation model other than static not "
876 //! Custom lower i64 integer constants
878 This code inserts all of the necessary juggling that needs to occur to load
879 a 64-bit constant into a register.
882 LowerConstant(SDValue Op, SelectionDAG &DAG) {
883 MVT VT = Op.getValueType();
885 if (VT == MVT::i64) {
886 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
887 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
888 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
889 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
891 cerr << "LowerConstant: unhandled constant type "
901 //! Custom lower double precision floating point constants
903 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
904 MVT VT = Op.getValueType();
906 if (VT == MVT::f64) {
907 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
910 "LowerConstantFP: Node is not ConstantFPSDNode");
912 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
913 SDValue T = DAG.getConstant(dbits, MVT::i64);
914 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
915 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
916 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
922 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
924 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
926 SDValue Cond = Op.getOperand(1);
927 MVT CondVT = Cond.getValueType();
930 if (CondVT == MVT::i8) {
932 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
934 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
937 return SDValue(); // Unchanged
941 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
943 MachineFunction &MF = DAG.getMachineFunction();
944 MachineFrameInfo *MFI = MF.getFrameInfo();
945 MachineRegisterInfo &RegInfo = MF.getRegInfo();
946 SmallVector<SDValue, 48> ArgValues;
947 SDValue Root = Op.getOperand(0);
948 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
950 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
951 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
953 unsigned ArgOffset = SPUFrameInfo::minStackSize();
954 unsigned ArgRegIdx = 0;
955 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
957 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
959 // Add DAG nodes to load the arguments or copy them out of registers.
960 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
961 ArgNo != e; ++ArgNo) {
962 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
963 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
966 if (ArgRegIdx < NumArgRegs) {
967 const TargetRegisterClass *ArgRegClass;
969 switch (ObjectVT.getSimpleVT()) {
971 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
972 << ObjectVT.getMVTString()
977 ArgRegClass = &SPU::R8CRegClass;
980 ArgRegClass = &SPU::R16CRegClass;
983 ArgRegClass = &SPU::R32CRegClass;
986 ArgRegClass = &SPU::R64CRegClass;
989 ArgRegClass = &SPU::R32FPRegClass;
992 ArgRegClass = &SPU::R64FPRegClass;
1000 ArgRegClass = &SPU::VECREGRegClass;
1004 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1005 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1006 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1009 // We need to load the argument to a virtual register if we determined
1010 // above that we ran out of physical registers of the appropriate type
1011 // or we're forced to do vararg
1012 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1013 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1014 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1015 ArgOffset += StackSlotSize;
1018 ArgValues.push_back(ArgVal);
1020 Root = ArgVal.getOperand(0);
1025 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1026 // We will spill (79-3)+1 registers to the stack
1027 SmallVector<SDValue, 79-3+1> MemOps;
1029 // Create the frame slot
1031 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1032 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1033 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1034 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1035 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1036 Root = Store.getOperand(0);
1037 MemOps.push_back(Store);
1039 // Increment address by stack slot size for the next stored argument
1040 ArgOffset += StackSlotSize;
1042 if (!MemOps.empty())
1043 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1046 ArgValues.push_back(Root);
1048 // Return the new list of results.
1049 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1050 &ArgValues[0], ArgValues.size());
1053 /// isLSAAddress - Return the immediate to use if the specified
1054 /// value is representable as a LSA address.
1055 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1056 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1059 int Addr = C->getZExtValue();
1060 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1061 (Addr << 14 >> 14) != Addr)
1062 return 0; // Top 14 bits have to be sext of immediate.
1064 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1069 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1070 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1071 SDValue Chain = TheCall->getChain();
1072 SDValue Callee = TheCall->getCallee();
1073 unsigned NumOps = TheCall->getNumArgs();
1074 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1075 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1076 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1078 // Handy pointer type
1079 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1081 // Accumulate how many bytes are to be pushed on the stack, including the
1082 // linkage area, and parameter passing area. According to the SPU ABI,
1083 // we minimally need space for [LR] and [SP]
1084 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1086 // Set up a copy of the stack pointer for use loading and storing any
1087 // arguments that may not fit in the registers available for argument
1089 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1091 // Figure out which arguments are going to go in registers, and which in
1093 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1094 unsigned ArgRegIdx = 0;
1096 // Keep track of registers passing arguments
1097 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1098 // And the arguments passed on the stack
1099 SmallVector<SDValue, 8> MemOpChains;
1101 for (unsigned i = 0; i != NumOps; ++i) {
1102 SDValue Arg = TheCall->getArg(i);
1104 // PtrOff will be used to store the current argument to the stack if a
1105 // register cannot be found for it.
1106 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1107 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1109 switch (Arg.getValueType().getSimpleVT()) {
1110 default: assert(0 && "Unexpected ValueType for argument!");
1114 if (ArgRegIdx != NumArgRegs) {
1115 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1117 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1118 ArgOffset += StackSlotSize;
1123 if (ArgRegIdx != NumArgRegs) {
1124 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1126 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1127 ArgOffset += StackSlotSize;
1136 if (ArgRegIdx != NumArgRegs) {
1137 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1139 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1140 ArgOffset += StackSlotSize;
1146 // Update number of stack bytes actually used, insert a call sequence start
1147 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1148 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1151 if (!MemOpChains.empty()) {
1152 // Adjust the stack pointer for the stack arguments.
1153 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1154 &MemOpChains[0], MemOpChains.size());
1157 // Build a sequence of copy-to-reg nodes chained together with token chain
1158 // and flag operands which copy the outgoing args into the appropriate regs.
1160 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1161 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1163 InFlag = Chain.getValue(1);
1166 SmallVector<SDValue, 8> Ops;
1167 unsigned CallOpc = SPUISD::CALL;
1169 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1170 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1171 // node so that legalize doesn't hack it.
1172 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1173 GlobalValue *GV = G->getGlobal();
1174 MVT CalleeVT = Callee.getValueType();
1175 SDValue Zero = DAG.getConstant(0, PtrVT);
1176 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1178 if (!ST->usingLargeMem()) {
1179 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1180 // style calls, otherwise, external symbols are BRASL calls. This assumes
1181 // that declared/defined symbols are in the same compilation unit and can
1182 // be reached through PC-relative jumps.
1185 // This may be an unsafe assumption for JIT and really large compilation
1187 if (GV->isDeclaration()) {
1188 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1190 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1193 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1195 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1197 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1198 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1199 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1200 // If this is an absolute destination address that appears to be a legal
1201 // local store address, use the munged value.
1202 Callee = SDValue(Dest, 0);
1205 Ops.push_back(Chain);
1206 Ops.push_back(Callee);
1208 // Add argument registers to the end of the list so that they are known live
1210 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1211 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1212 RegsToPass[i].second.getValueType()));
1214 if (InFlag.getNode())
1215 Ops.push_back(InFlag);
1216 // Returns a chain and a flag for retval copy to use.
1217 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1218 &Ops[0], Ops.size());
1219 InFlag = Chain.getValue(1);
1221 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1222 DAG.getIntPtrConstant(0, true), InFlag);
1223 if (TheCall->getValueType(0) != MVT::Other)
1224 InFlag = Chain.getValue(1);
1226 SDValue ResultVals[3];
1227 unsigned NumResults = 0;
1229 // If the call has results, copy the values out of the ret val registers.
1230 switch (TheCall->getValueType(0).getSimpleVT()) {
1231 default: assert(0 && "Unexpected ret value!");
1232 case MVT::Other: break;
1234 if (TheCall->getValueType(1) == MVT::i32) {
1235 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1236 ResultVals[0] = Chain.getValue(0);
1237 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1238 Chain.getValue(2)).getValue(1);
1239 ResultVals[1] = Chain.getValue(0);
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1243 ResultVals[0] = Chain.getValue(0);
1248 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1249 ResultVals[0] = Chain.getValue(0);
1254 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1255 InFlag).getValue(1);
1256 ResultVals[0] = Chain.getValue(0);
1265 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1266 InFlag).getValue(1);
1267 ResultVals[0] = Chain.getValue(0);
1272 // If the function returns void, just return the chain.
1273 if (NumResults == 0)
1276 // Otherwise, merge everything together with a MERGE_VALUES node.
1277 ResultVals[NumResults++] = Chain;
1278 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1279 return Res.getValue(Op.getResNo());
1283 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1284 SmallVector<CCValAssign, 16> RVLocs;
1285 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1286 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1287 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1288 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1290 // If this is the first return lowered for this function, add the regs to the
1291 // liveout set for the function.
1292 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1293 for (unsigned i = 0; i != RVLocs.size(); ++i)
1294 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1297 SDValue Chain = Op.getOperand(0);
1300 // Copy the result values into the output registers.
1301 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1302 CCValAssign &VA = RVLocs[i];
1303 assert(VA.isRegLoc() && "Can only return in registers!");
1304 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1305 Flag = Chain.getValue(1);
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1315 //===----------------------------------------------------------------------===//
1316 // Vector related lowering:
1317 //===----------------------------------------------------------------------===//
1319 static ConstantSDNode *
1320 getVecImm(SDNode *N) {
1321 SDValue OpVal(0, 0);
1323 // Check to see if this buildvec has a single non-undef value in its elements.
1324 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1325 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1326 if (OpVal.getNode() == 0)
1327 OpVal = N->getOperand(i);
1328 else if (OpVal != N->getOperand(i))
1332 if (OpVal.getNode() != 0) {
1333 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1338 return 0; // All UNDEF: use implicit def.; not Constant node
1341 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1342 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1344 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1346 if (ConstantSDNode *CN = getVecImm(N)) {
1347 uint64_t Value = CN->getZExtValue();
1348 if (ValueType == MVT::i64) {
1349 uint64_t UValue = CN->getZExtValue();
1350 uint32_t upper = uint32_t(UValue >> 32);
1351 uint32_t lower = uint32_t(UValue);
1354 Value = Value >> 32;
1356 if (Value <= 0x3ffff)
1357 return DAG.getTargetConstant(Value, ValueType);
1363 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1364 /// and the value fits into a signed 16-bit constant, and if so, return the
1366 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1368 if (ConstantSDNode *CN = getVecImm(N)) {
1369 int64_t Value = CN->getSExtValue();
1370 if (ValueType == MVT::i64) {
1371 uint64_t UValue = CN->getZExtValue();
1372 uint32_t upper = uint32_t(UValue >> 32);
1373 uint32_t lower = uint32_t(UValue);
1376 Value = Value >> 32;
1378 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1379 return DAG.getTargetConstant(Value, ValueType);
1386 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1387 /// and the value fits into a signed 10-bit constant, and if so, return the
1389 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1391 if (ConstantSDNode *CN = getVecImm(N)) {
1392 int64_t Value = CN->getSExtValue();
1393 if (ValueType == MVT::i64) {
1394 uint64_t UValue = CN->getZExtValue();
1395 uint32_t upper = uint32_t(UValue >> 32);
1396 uint32_t lower = uint32_t(UValue);
1399 Value = Value >> 32;
1401 if (isS10Constant(Value))
1402 return DAG.getTargetConstant(Value, ValueType);
1408 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1409 /// and the value fits into a signed 8-bit constant, and if so, return the
1412 /// @note: The incoming vector is v16i8 because that's the only way we can load
1413 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1415 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1417 if (ConstantSDNode *CN = getVecImm(N)) {
1418 int Value = (int) CN->getZExtValue();
1419 if (ValueType == MVT::i16
1420 && Value <= 0xffff /* truncated from uint64_t */
1421 && ((short) Value >> 8) == ((short) Value & 0xff))
1422 return DAG.getTargetConstant(Value & 0xff, ValueType);
1423 else if (ValueType == MVT::i8
1424 && (Value & 0xff) == Value)
1425 return DAG.getTargetConstant(Value, ValueType);
1431 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1432 /// and the value fits into a signed 16-bit constant, and if so, return the
1434 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1436 if (ConstantSDNode *CN = getVecImm(N)) {
1437 uint64_t Value = CN->getZExtValue();
1438 if ((ValueType == MVT::i32
1439 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1440 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1441 return DAG.getTargetConstant(Value >> 16, ValueType);
1447 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1448 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1449 if (ConstantSDNode *CN = getVecImm(N)) {
1450 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1456 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1457 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1458 if (ConstantSDNode *CN = getVecImm(N)) {
1459 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1465 // If this is a vector of constants or undefs, get the bits. A bit in
1466 // UndefBits is set if the corresponding element of the vector is an
1467 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1468 // zero. Return true if this is not an array of constants, false if it is.
1470 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1471 uint64_t UndefBits[2]) {
1472 // Start with zero'd results.
1473 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1475 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1476 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1477 SDValue OpVal = BV->getOperand(i);
1479 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1480 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1482 uint64_t EltBits = 0;
1483 if (OpVal.getOpcode() == ISD::UNDEF) {
1484 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1485 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1487 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1488 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1489 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1490 const APFloat &apf = CN->getValueAPF();
1491 EltBits = (CN->getValueType(0) == MVT::f32
1492 ? FloatToBits(apf.convertToFloat())
1493 : DoubleToBits(apf.convertToDouble()));
1495 // Nonconstant element.
1499 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1502 //printf("%llx %llx %llx %llx\n",
1503 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1507 /// If this is a splat (repetition) of a value across the whole vector, return
1508 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1509 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1510 /// SplatSize = 1 byte.
1511 static bool isConstantSplat(const uint64_t Bits128[2],
1512 const uint64_t Undef128[2],
1514 uint64_t &SplatBits, uint64_t &SplatUndef,
1516 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1517 // the same as the lower 64-bits, ignoring undefs.
1518 uint64_t Bits64 = Bits128[0] | Bits128[1];
1519 uint64_t Undef64 = Undef128[0] & Undef128[1];
1520 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1521 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1522 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1523 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1525 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1526 if (MinSplatBits < 64) {
1528 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1530 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1531 if (MinSplatBits < 32) {
1533 // If the top 16-bits are different than the lower 16-bits, ignoring
1534 // undefs, we have an i32 splat.
1535 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1536 if (MinSplatBits < 16) {
1537 // If the top 8-bits are different than the lower 8-bits, ignoring
1538 // undefs, we have an i16 splat.
1539 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1540 == ((Bits16 >> 8) & ~Undef16)) {
1541 // Otherwise, we have an 8-bit splat.
1542 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1543 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1549 SplatUndef = Undef16;
1556 SplatUndef = Undef32;
1562 SplatBits = Bits128[0];
1563 SplatUndef = Undef128[0];
1569 return false; // Can't be a splat if two pieces don't match.
1572 // If this is a case we can't handle, return null and let the default
1573 // expansion code take care of it. If we CAN select this case, and if it
1574 // selects to a single instruction, return Op. Otherwise, if we can codegen
1575 // this case more efficiently than a constant pool load, lower it to the
1576 // sequence of ops that should be used.
1577 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1578 MVT VT = Op.getValueType();
1579 // If this is a vector of constants or undefs, get the bits. A bit in
1580 // UndefBits is set if the corresponding element of the vector is an
1581 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1583 uint64_t VectorBits[2];
1584 uint64_t UndefBits[2];
1585 uint64_t SplatBits, SplatUndef;
1587 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1588 || !isConstantSplat(VectorBits, UndefBits,
1589 VT.getVectorElementType().getSizeInBits(),
1590 SplatBits, SplatUndef, SplatSize))
1591 return SDValue(); // Not a constant vector, not a splat.
1593 switch (VT.getSimpleVT()) {
1596 uint32_t Value32 = SplatBits;
1597 assert(SplatSize == 4
1598 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1599 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1600 SDValue T = DAG.getConstant(Value32, MVT::i32);
1601 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1602 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1606 uint64_t f64val = SplatBits;
1607 assert(SplatSize == 8
1608 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1609 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1610 SDValue T = DAG.getConstant(f64val, MVT::i64);
1611 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1612 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1616 // 8-bit constants have to be expanded to 16-bits
1617 unsigned short Value16 = SplatBits | (SplatBits << 8);
1619 for (int i = 0; i < 8; ++i)
1620 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1621 return DAG.getNode(ISD::BIT_CONVERT, VT,
1622 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1625 unsigned short Value16;
1627 Value16 = (unsigned short) (SplatBits & 0xffff);
1629 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1630 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1632 for (int i = 0; i < 8; ++i) Ops[i] = T;
1633 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1636 unsigned int Value = SplatBits;
1637 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1638 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1641 uint64_t val = SplatBits;
1642 uint32_t upper = uint32_t(val >> 32);
1643 uint32_t lower = uint32_t(val);
1645 if (upper == lower) {
1646 // Magic constant that can be matched by IL, ILA, et. al.
1647 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1648 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1652 SmallVector<SDValue, 16> ShufBytes;
1654 bool upper_special, lower_special;
1656 // NOTE: This code creates common-case shuffle masks that can be easily
1657 // detected as common expressions. It is not attempting to create highly
1658 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1660 // Detect if the upper or lower half is a special shuffle mask pattern:
1661 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1662 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1664 // Create lower vector if not a special pattern
1665 if (!lower_special) {
1666 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1667 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1668 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1669 LO32C, LO32C, LO32C, LO32C));
1672 // Create upper vector if not a special pattern
1673 if (!upper_special) {
1674 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1675 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1676 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1677 HI32C, HI32C, HI32C, HI32C));
1680 // If either upper or lower are special, then the two input operands are
1681 // the same (basically, one of them is a "don't care")
1686 if (lower_special && upper_special) {
1687 // Unhappy situation... both upper and lower are special, so punt with
1688 // a target constant:
1689 SDValue Zero = DAG.getConstant(0, MVT::i32);
1690 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1694 for (int i = 0; i < 4; ++i) {
1696 for (int j = 0; j < 4; ++j) {
1698 bool process_upper, process_lower;
1700 process_upper = (upper_special && (i & 1) == 0);
1701 process_lower = (lower_special && (i & 1) == 1);
1703 if (process_upper || process_lower) {
1704 if ((process_upper && upper == 0)
1705 || (process_lower && lower == 0))
1707 else if ((process_upper && upper == 0xffffffff)
1708 || (process_lower && lower == 0xffffffff))
1710 else if ((process_upper && upper == 0x80000000)
1711 || (process_lower && lower == 0x80000000))
1712 val |= (j == 0 ? 0xe0 : 0x80);
1714 val |= i * 4 + j + ((i & 1) * 16);
1717 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1720 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1721 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1722 &ShufBytes[0], ShufBytes.size()));
1730 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1731 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1732 /// permutation vector, V3, is monotonically increasing with one "exception"
1733 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1734 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1735 /// In either case, the net result is going to eventually invoke SHUFB to
1736 /// permute/shuffle the bytes from V1 and V2.
1738 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1739 /// control word for byte/halfword/word insertion. This takes care of a single
1740 /// element move from V2 into V1.
1742 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1743 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1744 SDValue V1 = Op.getOperand(0);
1745 SDValue V2 = Op.getOperand(1);
1746 SDValue PermMask = Op.getOperand(2);
1748 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1750 // If we have a single element being moved from V1 to V2, this can be handled
1751 // using the C*[DX] compute mask instructions, but the vector elements have
1752 // to be monotonically increasing with one exception element.
1753 MVT VecVT = V1.getValueType();
1754 MVT EltVT = VecVT.getVectorElementType();
1755 unsigned EltsFromV2 = 0;
1757 unsigned V2EltIdx0 = 0;
1758 unsigned CurrElt = 0;
1759 unsigned MaxElts = VecVT.getVectorNumElements();
1760 unsigned PrevElt = 0;
1762 bool monotonic = true;
1765 if (EltVT == MVT::i8) {
1767 } else if (EltVT == MVT::i16) {
1769 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1771 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1774 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1776 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1777 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1778 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1781 if (SrcElt >= V2EltIdx0) {
1782 if (1 >= (++EltsFromV2)) {
1783 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1785 } else if (CurrElt != SrcElt) {
1793 if (PrevElt > 0 && SrcElt < MaxElts) {
1794 if ((PrevElt == SrcElt - 1)
1795 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1802 } else if (PrevElt == 0) {
1803 // First time through, need to keep track of previous element
1806 // This isn't a rotation, takes elements from vector 2
1813 if (EltsFromV2 == 1 && monotonic) {
1814 // Compute mask and shuffle
1815 MachineFunction &MF = DAG.getMachineFunction();
1816 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1817 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1818 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1819 // Initialize temporary register to 0
1820 SDValue InitTempReg =
1821 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1822 // Copy register's contents as index in SHUFFLE_MASK:
1823 SDValue ShufMaskOp =
1824 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1825 DAG.getTargetConstant(V2Elt, MVT::i32),
1826 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1827 // Use shuffle mask in SHUFB synthetic instruction:
1828 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1829 } else if (rotate) {
1830 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1832 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1833 V1, DAG.getConstant(rotamt, MVT::i16));
1835 // Convert the SHUFFLE_VECTOR mask's input element units to the
1837 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1839 SmallVector<SDValue, 16> ResultMask;
1840 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1842 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1845 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1847 for (unsigned j = 0; j < BytesPerElement; ++j) {
1848 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1853 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1854 &ResultMask[0], ResultMask.size());
1855 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1859 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1860 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1862 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1863 // For a constant, build the appropriate constant vector, which will
1864 // eventually simplify to a vector register load.
1866 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1867 SmallVector<SDValue, 16> ConstVecValues;
1871 // Create a constant vector:
1872 switch (Op.getValueType().getSimpleVT()) {
1873 default: assert(0 && "Unexpected constant value type in "
1874 "LowerSCALAR_TO_VECTOR");
1875 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1876 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1877 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1878 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1879 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1880 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1883 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1884 for (size_t j = 0; j < n_copies; ++j)
1885 ConstVecValues.push_back(CValue);
1887 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1888 &ConstVecValues[0], ConstVecValues.size());
1890 // Otherwise, copy the value from one register to another:
1891 switch (Op0.getValueType().getSimpleVT()) {
1892 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1899 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1906 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1907 switch (Op.getValueType().getSimpleVT()) {
1909 cerr << "CellSPU: Unknown vector multiplication, got "
1910 << Op.getValueType().getMVTString()
1916 SDValue rA = Op.getOperand(0);
1917 SDValue rB = Op.getOperand(1);
1918 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1919 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1920 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1921 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1923 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1927 // Multiply two v8i16 vectors (pipeline friendly version):
1928 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1929 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1930 // c) Use SELB to select upper and lower halves from the intermediate results
1932 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1933 // dual-issue. This code does manage to do this, even if it's a little on
1936 MachineFunction &MF = DAG.getMachineFunction();
1937 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1938 SDValue Chain = Op.getOperand(0);
1939 SDValue rA = Op.getOperand(0);
1940 SDValue rB = Op.getOperand(1);
1941 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1942 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1945 DAG.getCopyToReg(Chain, FSMBIreg,
1946 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1947 DAG.getConstant(0xcccc, MVT::i16)));
1950 DAG.getCopyToReg(FSMBOp, HiProdReg,
1951 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1953 SDValue HHProd_v4i32 =
1954 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1955 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1957 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1958 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1959 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1960 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1962 DAG.getConstant(16, MVT::i16))),
1963 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1966 // This M00sE is N@stI! (apologies to Monty Python)
1968 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1969 // is to break it all apart, sign extend, and reassemble the various
1970 // intermediate products.
1972 SDValue rA = Op.getOperand(0);
1973 SDValue rB = Op.getOperand(1);
1974 SDValue c8 = DAG.getConstant(8, MVT::i32);
1975 SDValue c16 = DAG.getConstant(16, MVT::i32);
1978 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1979 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1982 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1984 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1987 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1988 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1990 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1991 DAG.getConstant(0x2222, MVT::i16));
1993 SDValue LoProdParts =
1994 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1995 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1996 LLProd, LHProd, FSMBmask));
1998 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2001 DAG.getNode(ISD::AND, MVT::v4i32,
2003 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2004 LoProdMask, LoProdMask,
2005 LoProdMask, LoProdMask));
2008 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2009 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2012 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2013 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2016 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2017 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2018 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2021 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2022 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2023 DAG.getNode(SPUISD::VEC_SRA,
2024 MVT::v4i32, rAH, c8)),
2025 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2026 DAG.getNode(SPUISD::VEC_SRA,
2027 MVT::v4i32, rBH, c8)));
2030 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2032 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2036 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2038 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2039 DAG.getNode(ISD::OR, MVT::v4i32,
2047 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2048 MachineFunction &MF = DAG.getMachineFunction();
2049 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2051 SDValue A = Op.getOperand(0);
2052 SDValue B = Op.getOperand(1);
2053 MVT VT = Op.getValueType();
2055 unsigned VRegBR, VRegC;
2057 if (VT == MVT::f32) {
2058 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2059 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2061 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2062 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2064 // TODO: make sure we're feeding FPInterp the right arguments
2065 // Right now: fi B, frest(B)
2068 // (Floating Interpolate (FP Reciprocal Estimate B))
2070 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2071 DAG.getNode(SPUISD::FPInterp, VT, B,
2072 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2074 // Computes A * BRcpl and stores in a temporary register
2076 DAG.getCopyToReg(BRcpl, VRegC,
2077 DAG.getNode(ISD::FMUL, VT, A,
2078 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2079 // What's the Chain variable do? It's magic!
2080 // TODO: set Chain = Op(0).getEntryNode()
2082 return DAG.getNode(ISD::FADD, VT,
2083 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2084 DAG.getNode(ISD::FMUL, VT,
2085 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2086 DAG.getNode(ISD::FSUB, VT, A,
2087 DAG.getNode(ISD::FMUL, VT, B,
2088 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2091 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2092 MVT VT = Op.getValueType();
2093 SDValue N = Op.getOperand(0);
2094 SDValue Elt = Op.getOperand(1);
2097 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2098 // Constant argument:
2099 int EltNo = (int) C->getZExtValue();
2102 if (VT == MVT::i8 && EltNo >= 16)
2103 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2104 else if (VT == MVT::i16 && EltNo >= 8)
2105 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2106 else if (VT == MVT::i32 && EltNo >= 4)
2107 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2108 else if (VT == MVT::i64 && EltNo >= 2)
2109 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2111 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2112 // i32 and i64: Element 0 is the preferred slot
2113 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2116 // Need to generate shuffle mask and extract:
2117 int prefslot_begin = -1, prefslot_end = -1;
2118 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2120 switch (VT.getSimpleVT()) {
2122 assert(false && "Invalid value type!");
2124 prefslot_begin = prefslot_end = 3;
2128 prefslot_begin = 2; prefslot_end = 3;
2133 prefslot_begin = 0; prefslot_end = 3;
2138 prefslot_begin = 0; prefslot_end = 7;
2143 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2144 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2146 unsigned int ShufBytes[16];
2147 for (int i = 0; i < 16; ++i) {
2148 // zero fill uppper part of preferred slot, don't care about the
2150 unsigned int mask_val;
2151 if (i <= prefslot_end) {
2153 ((i < prefslot_begin)
2155 : elt_byte + (i - prefslot_begin));
2157 ShufBytes[i] = mask_val;
2159 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2162 SDValue ShufMask[4];
2163 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2164 unsigned bidx = i * 4;
2165 unsigned int bits = ((ShufBytes[bidx] << 24) |
2166 (ShufBytes[bidx+1] << 16) |
2167 (ShufBytes[bidx+2] << 8) |
2169 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2172 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2174 sizeof(ShufMask) / sizeof(ShufMask[0]));
2176 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2177 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2178 N, N, ShufMaskVec));
2180 // Variable index: Rotate the requested element into slot 0, then replicate
2181 // slot 0 across the vector
2182 MVT VecVT = N.getValueType();
2183 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2184 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2188 // Make life easier by making sure the index is zero-extended to i32
2189 if (Elt.getValueType() != MVT::i32)
2190 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2192 // Scale the index to a bit/byte shift quantity
2194 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2195 unsigned scaleShift = scaleFactor.logBase2();
2198 if (scaleShift > 0) {
2199 // Scale the shift factor:
2200 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2201 DAG.getConstant(scaleShift, MVT::i32));
2204 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2206 // Replicate the bytes starting at byte 0 across the entire vector (for
2207 // consistency with the notion of a unified register set)
2210 switch (VT.getSimpleVT()) {
2212 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2216 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2217 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2222 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2223 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2229 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2230 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2236 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2237 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2238 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2239 loFactor, hiFactor);
2244 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2245 DAG.getNode(SPUISD::SHUFB, VecVT,
2246 vecShift, vecShift, replicate));
2252 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2253 SDValue VecOp = Op.getOperand(0);
2254 SDValue ValOp = Op.getOperand(1);
2255 SDValue IdxOp = Op.getOperand(2);
2256 MVT VT = Op.getValueType();
2258 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2259 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2261 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2262 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2263 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2264 DAG.getRegister(SPU::R1, PtrVT),
2265 DAG.getConstant(CN->getSExtValue(), PtrVT));
2266 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2269 DAG.getNode(SPUISD::SHUFB, VT,
2270 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2272 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2277 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2279 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2281 assert(Op.getValueType() == MVT::i8);
2284 assert(0 && "Unhandled i8 math operator");
2288 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2290 SDValue N1 = Op.getOperand(1);
2291 N0 = (N0.getOpcode() != ISD::Constant
2292 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2293 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2295 N1 = (N1.getOpcode() != ISD::Constant
2296 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2297 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2299 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2300 DAG.getNode(Opc, MVT::i16, N0, N1));
2304 SDValue N1 = Op.getOperand(1);
2306 N0 = (N0.getOpcode() != ISD::Constant
2307 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2308 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2310 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2313 N1 = (N1.getOpcode() != ISD::Constant
2314 ? DAG.getNode(N1Opc, MVT::i32, N1)
2315 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2318 DAG.getNode(ISD::OR, MVT::i16, N0,
2319 DAG.getNode(ISD::SHL, MVT::i16,
2320 N0, DAG.getConstant(8, MVT::i32)));
2321 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2322 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2326 SDValue N1 = Op.getOperand(1);
2328 N0 = (N0.getOpcode() != ISD::Constant
2329 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2330 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2332 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2335 N1 = (N1.getOpcode() != ISD::Constant
2336 ? DAG.getNode(N1Opc, MVT::i16, N1)
2337 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2339 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2340 DAG.getNode(Opc, MVT::i16, N0, N1));
2343 SDValue N1 = Op.getOperand(1);
2345 N0 = (N0.getOpcode() != ISD::Constant
2346 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2347 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2349 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2352 N1 = (N1.getOpcode() != ISD::Constant
2353 ? DAG.getNode(N1Opc, MVT::i16, N1)
2354 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2356 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2357 DAG.getNode(Opc, MVT::i16, N0, N1));
2360 SDValue N1 = Op.getOperand(1);
2362 N0 = (N0.getOpcode() != ISD::Constant
2363 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2364 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2366 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2367 N1 = (N1.getOpcode() != ISD::Constant
2368 ? DAG.getNode(N1Opc, MVT::i16, N1)
2369 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2371 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2372 DAG.getNode(Opc, MVT::i16, N0, N1));
2380 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2382 MVT VT = Op.getValueType();
2383 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2385 SDValue Op0 = Op.getOperand(0);
2388 case ISD::ZERO_EXTEND:
2389 case ISD::SIGN_EXTEND:
2390 case ISD::ANY_EXTEND: {
2391 MVT Op0VT = Op0.getValueType();
2392 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2394 assert(Op0VT == MVT::i32
2395 && "CellSPU: Zero/sign extending something other than i32");
2397 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2399 SDValue PromoteScalar =
2400 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2402 if (Opc != ISD::SIGN_EXTEND) {
2403 // Use a shuffle to zero extend the i32 to i64 directly:
2405 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2406 DAG.getConstant(0x80808080, MVT::i32),
2407 DAG.getConstant(0x00010203, MVT::i32),
2408 DAG.getConstant(0x80808080, MVT::i32),
2409 DAG.getConstant(0x08090a0b, MVT::i32));
2410 SDValue zextShuffle =
2411 DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2412 PromoteScalar, PromoteScalar, shufMask);
2414 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2415 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2417 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2418 // right and propagate the sign bit) instruction.
2420 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2421 PromoteScalar, DAG.getConstant(4, MVT::i32));
2423 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2424 PromoteScalar, DAG.getConstant(32, MVT::i32));
2426 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2427 DAG.getConstant(0xf0f0, MVT::i16));
2428 SDValue CombineQuad =
2429 DAG.getNode(SPUISD::SELB, Op0VecVT,
2430 SignQuad, RotQuad, SelMask);
2432 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2433 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2438 // Turn operands into vectors to satisfy type checking (shufb works on
2441 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2443 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2444 SmallVector<SDValue, 16> ShufBytes;
2446 // Create the shuffle mask for "rotating" the borrow up one register slot
2447 // once the borrow is generated.
2448 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2449 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2450 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2451 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2454 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2455 SDValue ShiftedCarry =
2456 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2458 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2459 &ShufBytes[0], ShufBytes.size()));
2461 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2462 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2463 Op0, Op1, ShiftedCarry));
2467 // Turn operands into vectors to satisfy type checking (shufb works on
2470 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2472 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2473 SmallVector<SDValue, 16> ShufBytes;
2475 // Create the shuffle mask for "rotating" the borrow up one register slot
2476 // once the borrow is generated.
2477 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2478 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2479 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2480 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2483 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2484 SDValue ShiftedBorrow =
2485 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2486 BorrowGen, BorrowGen,
2487 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2488 &ShufBytes[0], ShufBytes.size()));
2490 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2491 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2492 Op0, Op1, ShiftedBorrow));
2496 SDValue ShiftAmt = Op.getOperand(1);
2497 MVT ShiftAmtVT = ShiftAmt.getValueType();
2498 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2500 DAG.getNode(SPUISD::SELB, VecVT,
2502 DAG.getConstant(0, VecVT),
2503 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2504 DAG.getConstant(0xff00ULL, MVT::i16)));
2505 SDValue ShiftAmtBytes =
2506 DAG.getNode(ISD::SRL, ShiftAmtVT,
2508 DAG.getConstant(3, ShiftAmtVT));
2509 SDValue ShiftAmtBits =
2510 DAG.getNode(ISD::AND, ShiftAmtVT,
2512 DAG.getConstant(7, ShiftAmtVT));
2514 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2515 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2516 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2517 MaskLower, ShiftAmtBytes),
2522 MVT VT = Op.getValueType();
2523 SDValue ShiftAmt = Op.getOperand(1);
2524 MVT ShiftAmtVT = ShiftAmt.getValueType();
2525 SDValue ShiftAmtBytes =
2526 DAG.getNode(ISD::SRL, ShiftAmtVT,
2528 DAG.getConstant(3, ShiftAmtVT));
2529 SDValue ShiftAmtBits =
2530 DAG.getNode(ISD::AND, ShiftAmtVT,
2532 DAG.getConstant(7, ShiftAmtVT));
2534 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2535 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2536 Op0, ShiftAmtBytes),
2541 // Promote Op0 to vector
2543 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2544 SDValue ShiftAmt = Op.getOperand(1);
2545 MVT ShiftVT = ShiftAmt.getValueType();
2547 // Negate variable shift amounts
2548 if (!isa<ConstantSDNode>(ShiftAmt)) {
2549 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2550 DAG.getConstant(0, ShiftVT), ShiftAmt);
2553 SDValue UpperHalfSign =
2554 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2555 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2556 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2557 Op0, DAG.getConstant(31, MVT::i32))));
2558 SDValue UpperHalfSignMask =
2559 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2560 SDValue UpperLowerMask =
2561 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2562 DAG.getConstant(0xff00, MVT::i16));
2563 SDValue UpperLowerSelect =
2564 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2565 UpperHalfSignMask, Op0, UpperLowerMask);
2566 SDValue RotateLeftBytes =
2567 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2568 UpperLowerSelect, ShiftAmt);
2569 SDValue RotateLeftBits =
2570 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2571 RotateLeftBytes, ShiftAmt);
2573 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2581 //! Lower byte immediate operations for v16i8 vectors:
2583 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2586 MVT VT = Op.getValueType();
2588 ConstVec = Op.getOperand(0);
2589 Arg = Op.getOperand(1);
2590 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2591 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2592 ConstVec = ConstVec.getOperand(0);
2594 ConstVec = Op.getOperand(1);
2595 Arg = Op.getOperand(0);
2596 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2597 ConstVec = ConstVec.getOperand(0);
2602 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2603 uint64_t VectorBits[2];
2604 uint64_t UndefBits[2];
2605 uint64_t SplatBits, SplatUndef;
2608 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2609 && isConstantSplat(VectorBits, UndefBits,
2610 VT.getVectorElementType().getSizeInBits(),
2611 SplatBits, SplatUndef, SplatSize)) {
2613 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2614 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2616 // Turn the BUILD_VECTOR into a set of target constants:
2617 for (size_t i = 0; i < tcVecSize; ++i)
2620 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2621 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2624 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2625 // lowered. Return the operation, rather than a null SDValue.
2629 //! Lower i32 multiplication
2630 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2632 switch (VT.getSimpleVT()) {
2634 cerr << "CellSPU: Unknown LowerMUL value type, got "
2635 << Op.getValueType().getMVTString()
2641 SDValue rA = Op.getOperand(0);
2642 SDValue rB = Op.getOperand(1);
2644 return DAG.getNode(ISD::ADD, MVT::i32,
2645 DAG.getNode(ISD::ADD, MVT::i32,
2646 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2647 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2648 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2655 //! Custom lowering for CTPOP (count population)
2657 Custom lowering code that counts the number ones in the input
2658 operand. SPU has such an instruction, but it counts the number of
2659 ones per byte, which then have to be accumulated.
2661 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2662 MVT VT = Op.getValueType();
2663 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2665 switch (VT.getSimpleVT()) {
2667 assert(false && "Invalid value type!");
2669 SDValue N = Op.getOperand(0);
2670 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2672 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2673 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2675 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2679 MachineFunction &MF = DAG.getMachineFunction();
2680 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2682 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2684 SDValue N = Op.getOperand(0);
2685 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2686 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2687 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2689 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2690 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2692 // CNTB_result becomes the chain to which all of the virtual registers
2693 // CNTB_reg, SUM1_reg become associated:
2694 SDValue CNTB_result =
2695 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2697 SDValue CNTB_rescopy =
2698 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2700 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2702 return DAG.getNode(ISD::AND, MVT::i16,
2703 DAG.getNode(ISD::ADD, MVT::i16,
2704 DAG.getNode(ISD::SRL, MVT::i16,
2711 MachineFunction &MF = DAG.getMachineFunction();
2712 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2714 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2715 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2717 SDValue N = Op.getOperand(0);
2718 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2719 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2720 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2721 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2723 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2724 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2726 // CNTB_result becomes the chain to which all of the virtual registers
2727 // CNTB_reg, SUM1_reg become associated:
2728 SDValue CNTB_result =
2729 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2731 SDValue CNTB_rescopy =
2732 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2735 DAG.getNode(ISD::SRL, MVT::i32,
2736 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2739 DAG.getNode(ISD::ADD, MVT::i32,
2740 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2742 SDValue Sum1_rescopy =
2743 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2746 DAG.getNode(ISD::SRL, MVT::i32,
2747 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2750 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2751 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2753 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2763 //! Lower ISD::SELECT_CC
2765 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2768 \note Need to revisit this in the future: if the code path through the true
2769 and false value computations is longer than the latency of a branch (6
2770 cycles), then it would be more advantageous to branch and insert a new basic
2771 block and branch on the condition. However, this code does not make that
2772 assumption, given the simplisitc uses so far.
2775 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2776 MVT VT = Op.getValueType();
2777 SDValue lhs = Op.getOperand(0);
2778 SDValue rhs = Op.getOperand(1);
2779 SDValue trueval = Op.getOperand(2);
2780 SDValue falseval = Op.getOperand(3);
2781 SDValue condition = Op.getOperand(4);
2783 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2784 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2785 // with another "cannot select select_cc" assert:
2787 SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2788 return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2791 //! Custom lower ISD::TRUNCATE
2792 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2794 MVT VT = Op.getValueType();
2795 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2796 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2798 SDValue Op0 = Op.getOperand(0);
2799 MVT Op0VT = Op0.getValueType();
2800 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2802 SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2807 // Create shuffle mask
2808 switch (Op0VT.getSimpleVT()) {
2812 // least significant doubleword of quadword
2813 maskHigh = 0x08090a0b;
2814 maskLow = 0x0c0d0e0f;
2817 // least significant word of quadword
2818 maskHigh = maskLow = 0x0c0d0e0f;
2821 // least significant halfword of quadword
2822 maskHigh = maskLow = 0x0e0f0e0f;
2825 // least significant byte of quadword
2826 maskHigh = maskLow = 0x0f0f0f0f;
2829 cerr << "Truncation to illegal type!";
2836 // least significant word of doubleword
2837 maskHigh = maskLow = 0x04050607;
2840 // least significant halfword of doubleword
2841 maskHigh = maskLow = 0x06070607;
2844 // least significant byte of doubleword
2845 maskHigh = maskLow = 0x07070707;
2848 cerr << "Truncation to illegal type!";
2856 // least significant halfword of word
2857 maskHigh = maskLow = 0x02030203;
2860 // least significant byte of word/halfword
2861 maskHigh = maskLow = 0x03030303;
2864 cerr << "Truncation to illegal type!";
2869 cerr << "Trying to lower truncation from illegal type!";
2873 // Use a shuffle to perform the truncation
2874 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2875 DAG.getConstant(maskHigh, MVT::i32),
2876 DAG.getConstant(maskLow, MVT::i32),
2877 DAG.getConstant(maskHigh, MVT::i32),
2878 DAG.getConstant(maskLow, MVT::i32));
2880 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2881 PromoteScalar, PromoteScalar, shufMask);
2883 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2884 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2887 //! Custom (target-specific) lowering entry point
2889 This is where LLVM's DAG selection process calls to do target-specific
2893 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2895 unsigned Opc = (unsigned) Op.getOpcode();
2896 MVT VT = Op.getValueType();
2900 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2901 cerr << "Op.getOpcode() = " << Opc << "\n";
2902 cerr << "*Op.getNode():\n";
2903 Op.getNode()->dump();
2910 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2912 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2913 case ISD::ConstantPool:
2914 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2915 case ISD::GlobalAddress:
2916 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2917 case ISD::JumpTable:
2918 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2920 return LowerConstant(Op, DAG);
2921 case ISD::ConstantFP:
2922 return LowerConstantFP(Op, DAG);
2924 return LowerBRCOND(Op, DAG);
2925 case ISD::FORMAL_ARGUMENTS:
2926 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2928 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2930 return LowerRET(Op, DAG, getTargetMachine());
2933 // i8, i64 math ops:
2934 case ISD::ZERO_EXTEND:
2935 case ISD::SIGN_EXTEND:
2936 case ISD::ANY_EXTEND:
2945 return LowerI8Math(Op, DAG, Opc);
2946 else if (VT == MVT::i64)
2947 return LowerI64Math(Op, DAG, Opc);
2951 // Vector-related lowering.
2952 case ISD::BUILD_VECTOR:
2953 return LowerBUILD_VECTOR(Op, DAG);
2954 case ISD::SCALAR_TO_VECTOR:
2955 return LowerSCALAR_TO_VECTOR(Op, DAG);
2956 case ISD::VECTOR_SHUFFLE:
2957 return LowerVECTOR_SHUFFLE(Op, DAG);
2958 case ISD::EXTRACT_VECTOR_ELT:
2959 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2960 case ISD::INSERT_VECTOR_ELT:
2961 return LowerINSERT_VECTOR_ELT(Op, DAG);
2963 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2967 return LowerByteImmed(Op, DAG);
2969 // Vector and i8 multiply:
2972 return LowerVectorMUL(Op, DAG);
2973 else if (VT == MVT::i8)
2974 return LowerI8Math(Op, DAG, Opc);
2976 return LowerMUL(Op, DAG, VT, Opc);
2979 if (VT == MVT::f32 || VT == MVT::v4f32)
2980 return LowerFDIVf32(Op, DAG);
2982 // This is probably a libcall
2983 else if (Op.getValueType() == MVT::f64)
2984 return LowerFDIVf64(Op, DAG);
2987 assert(0 && "Calling FDIV on unsupported MVT");
2990 return LowerCTPOP(Op, DAG);
2992 case ISD::SELECT_CC:
2993 return LowerSELECT_CC(Op, DAG);
2996 return LowerTRUNCATE(Op, DAG);
3002 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
3003 SmallVectorImpl<SDValue>&Results,
3007 unsigned Opc = (unsigned) N->getOpcode();
3008 MVT OpVT = N->getValueType(0);
3012 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
3013 cerr << "Op.getOpcode() = " << Opc << "\n";
3014 cerr << "*Op.getNode():\n";
3022 /* Otherwise, return unchanged */
3025 //===----------------------------------------------------------------------===//
3026 // Target Optimization Hooks
3027 //===----------------------------------------------------------------------===//
3030 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
3033 TargetMachine &TM = getTargetMachine();
3035 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
3036 SelectionDAG &DAG = DCI.DAG;
3037 SDValue Op0 = N->getOperand(0); // everything has at least one operand
3038 MVT NodeVT = N->getValueType(0); // The node's value type
3039 MVT Op0VT = Op0.getValueType(); // The first operand's result
3040 SDValue Result; // Initially, empty result
3042 switch (N->getOpcode()) {
3045 SDValue Op1 = N->getOperand(1);
3047 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
3048 SDValue Op01 = Op0.getOperand(1);
3049 if (Op01.getOpcode() == ISD::Constant
3050 || Op01.getOpcode() == ISD::TargetConstant) {
3051 // (add <const>, (SPUindirect <arg>, <const>)) ->
3052 // (SPUindirect <arg>, <const + const>)
3053 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
3054 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
3055 SDValue combinedConst =
3056 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3058 #if !defined(NDEBUG)
3059 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3061 << "Replace: (add " << CN0->getZExtValue() << ", "
3062 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
3063 << "With: (SPUindirect <arg>, "
3064 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
3068 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
3069 Op0.getOperand(0), combinedConst);
3071 } else if (isa<ConstantSDNode>(Op0)
3072 && Op1.getOpcode() == SPUISD::IndirectAddr) {
3073 SDValue Op11 = Op1.getOperand(1);
3074 if (Op11.getOpcode() == ISD::Constant
3075 || Op11.getOpcode() == ISD::TargetConstant) {
3076 // (add (SPUindirect <arg>, <const>), <const>) ->
3077 // (SPUindirect <arg>, <const + const>)
3078 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
3079 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
3080 SDValue combinedConst =
3081 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3083 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
3084 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
3085 DEBUG(cerr << "With: (SPUindirect <arg>, "
3086 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
3088 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
3089 Op1.getOperand(0), combinedConst);
3094 case ISD::SIGN_EXTEND:
3095 case ISD::ZERO_EXTEND:
3096 case ISD::ANY_EXTEND: {
3097 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
3098 // (any_extend (SPUextract_elt0 <arg>)) ->
3099 // (SPUextract_elt0 <arg>)
3100 // Types must match, however...
3101 #if !defined(NDEBUG)
3102 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3103 cerr << "\nReplace: ";
3106 Op0.getNode()->dump(&DAG);
3115 case SPUISD::IndirectAddr: {
3116 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
3117 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
3118 if (CN->getZExtValue() == 0) {
3119 // (SPUindirect (SPUaform <addr>, 0), 0) ->
3120 // (SPUaform <addr>, 0)
3122 DEBUG(cerr << "Replace: ");
3123 DEBUG(N->dump(&DAG));
3124 DEBUG(cerr << "\nWith: ");
3125 DEBUG(Op0.getNode()->dump(&DAG));
3126 DEBUG(cerr << "\n");
3133 case SPUISD::SHLQUAD_L_BITS:
3134 case SPUISD::SHLQUAD_L_BYTES:
3135 case SPUISD::VEC_SHL:
3136 case SPUISD::VEC_SRL:
3137 case SPUISD::VEC_SRA:
3138 case SPUISD::ROTQUAD_RZ_BYTES:
3139 case SPUISD::ROTQUAD_RZ_BITS: {
3140 SDValue Op1 = N->getOperand(1);
3142 if (isa<ConstantSDNode>(Op1)) {
3143 // Kill degenerate vector shifts:
3144 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3145 if (CN->getZExtValue() == 0) {
3151 case SPUISD::PROMOTE_SCALAR: {
3152 switch (Op0.getOpcode()) {
3155 case ISD::ANY_EXTEND:
3156 case ISD::ZERO_EXTEND:
3157 case ISD::SIGN_EXTEND: {
3158 // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3160 // but only if the SPUpromote_scalar and <arg> types match.
3161 SDValue Op00 = Op0.getOperand(0);
3162 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3163 SDValue Op000 = Op00.getOperand(0);
3164 if (Op000.getValueType() == NodeVT) {
3170 case SPUISD::VEC2PREFSLOT: {
3171 // (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
3173 Result = Op0.getOperand(0);
3180 // Otherwise, return unchanged.
3182 if (Result.getNode()) {
3183 DEBUG(cerr << "\nReplace.SPU: ");
3184 DEBUG(N->dump(&DAG));
3185 DEBUG(cerr << "\nWith: ");
3186 DEBUG(Result.getNode()->dump(&DAG));
3187 DEBUG(cerr << "\n");
3194 //===----------------------------------------------------------------------===//
3195 // Inline Assembly Support
3196 //===----------------------------------------------------------------------===//
3198 /// getConstraintType - Given a constraint letter, return the type of
3199 /// constraint it is for this target.
3200 SPUTargetLowering::ConstraintType
3201 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3202 if (ConstraintLetter.size() == 1) {
3203 switch (ConstraintLetter[0]) {
3210 return C_RegisterClass;
3213 return TargetLowering::getConstraintType(ConstraintLetter);
3216 std::pair<unsigned, const TargetRegisterClass*>
3217 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3220 if (Constraint.size() == 1) {
3221 // GCC RS6000 Constraint Letters
3222 switch (Constraint[0]) {
3226 return std::make_pair(0U, SPU::R64CRegisterClass);
3227 return std::make_pair(0U, SPU::R32CRegisterClass);
3230 return std::make_pair(0U, SPU::R32FPRegisterClass);
3231 else if (VT == MVT::f64)
3232 return std::make_pair(0U, SPU::R64FPRegisterClass);
3235 return std::make_pair(0U, SPU::GPRCRegisterClass);
3239 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3242 //! Compute used/known bits for a SPU operand
3244 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3248 const SelectionDAG &DAG,
3249 unsigned Depth ) const {
3251 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3254 switch (Op.getOpcode()) {
3256 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3266 case SPUISD::PROMOTE_SCALAR: {
3267 SDValue Op0 = Op.getOperand(0);
3268 MVT Op0VT = Op0.getValueType();
3269 unsigned Op0VTBits = Op0VT.getSizeInBits();
3270 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3271 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3272 KnownOne |= APInt(Op0VTBits, InMask, false);
3276 case SPUISD::LDRESULT:
3277 case SPUISD::VEC2PREFSLOT: {
3278 MVT OpVT = Op.getValueType();
3279 unsigned OpVTBits = OpVT.getSizeInBits();
3280 uint64_t InMask = OpVT.getIntegerVTBitMask();
3281 KnownZero |= APInt(OpVTBits, ~InMask, false);
3282 KnownOne |= APInt(OpVTBits, InMask, false);
3291 case SPUISD::SHLQUAD_L_BITS:
3292 case SPUISD::SHLQUAD_L_BYTES:
3293 case SPUISD::VEC_SHL:
3294 case SPUISD::VEC_SRL:
3295 case SPUISD::VEC_SRA:
3296 case SPUISD::VEC_ROTL:
3297 case SPUISD::VEC_ROTR:
3298 case SPUISD::ROTQUAD_RZ_BYTES:
3299 case SPUISD::ROTQUAD_RZ_BITS:
3300 case SPUISD::ROTBYTES_LEFT:
3301 case SPUISD::SELECT_MASK:
3303 case SPUISD::FPInterp:
3304 case SPUISD::FPRecipEst:
3305 case SPUISD::SEXT32TO64:
3310 // LowerAsmOperandForConstraint
3312 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3313 char ConstraintLetter,
3315 std::vector<SDValue> &Ops,
3316 SelectionDAG &DAG) const {
3317 // Default, for the time being, to the base class handler
3318 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3322 /// isLegalAddressImmediate - Return true if the integer value can be used
3323 /// as the offset of the target addressing mode.
3324 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3325 const Type *Ty) const {
3326 // SPU's addresses are 256K:
3327 return (V > -(1 << 18) && V < (1 << 18) - 1);
3330 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3335 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3336 // The SPU target isn't yet aware of offsets.