1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // Initialize libcalls:
134 setLibcallName(RTLIB::MUL_I64, "__muldi3");
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
141 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
142 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
150 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
154 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i8 to "promote" the result to i16
171 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
173 // Expand the jumptable branches
174 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 // Custom lower SELECT_CC for most cases, but expand by default
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
180 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
181 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
183 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
186 // SPU has no intrinsics for these particular operations:
187 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
189 // PowerPC has no SREM/UREM instructions
190 setOperationAction(ISD::SREM, MVT::i32, Expand);
191 setOperationAction(ISD::UREM, MVT::i32, Expand);
192 setOperationAction(ISD::SREM, MVT::i64, Expand);
193 setOperationAction(ISD::UREM, MVT::i64, Expand);
195 // We don't support sin/cos/sqrt/fmod
196 setOperationAction(ISD::FSIN , MVT::f64, Expand);
197 setOperationAction(ISD::FCOS , MVT::f64, Expand);
198 setOperationAction(ISD::FREM , MVT::f64, Expand);
199 setOperationAction(ISD::FSIN , MVT::f32, Expand);
200 setOperationAction(ISD::FCOS , MVT::f32, Expand);
201 setOperationAction(ISD::FREM , MVT::f32, Expand);
203 // If we're enabling GP optimizations, use hardware square root
204 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
205 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
207 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
208 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
210 // SPU can do rotate right and left, so legalize it... but customize for i8
211 // because instructions don't exist.
213 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
215 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
216 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
217 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
219 setOperationAction(ISD::ROTL, MVT::i32, Legal);
220 setOperationAction(ISD::ROTL, MVT::i16, Legal);
221 setOperationAction(ISD::ROTL, MVT::i8, Custom);
223 // SPU has no native version of shift left/right for i8
224 setOperationAction(ISD::SHL, MVT::i8, Custom);
225 setOperationAction(ISD::SRL, MVT::i8, Custom);
226 setOperationAction(ISD::SRA, MVT::i8, Custom);
228 // SPU needs custom lowering for shift left/right for i64
229 setOperationAction(ISD::SHL, MVT::i64, Custom);
230 setOperationAction(ISD::SRL, MVT::i64, Custom);
231 setOperationAction(ISD::SRA, MVT::i64, Custom);
233 // Custom lower i8, i32 and i64 multiplications
234 setOperationAction(ISD::MUL, MVT::i8, Custom);
235 setOperationAction(ISD::MUL, MVT::i32, Custom);
236 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
238 // SMUL_LOHI, UMUL_LOHI
239 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
240 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
241 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
242 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
244 // Need to custom handle (some) common i8, i64 math ops
245 setOperationAction(ISD::ADD, MVT::i64, Custom);
246 setOperationAction(ISD::SUB, MVT::i8, Custom);
247 setOperationAction(ISD::SUB, MVT::i64, Custom);
249 // SPU does not have BSWAP. It does have i32 support CTLZ.
250 // CTPOP has to be custom lowered.
251 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
252 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
254 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
255 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
256 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
257 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
259 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
260 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
262 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
264 // SPU has a version of select that implements (a&~c)|(b&c), just like
265 // select ought to work:
266 setOperationAction(ISD::SELECT, MVT::i8, Legal);
267 setOperationAction(ISD::SELECT, MVT::i16, Legal);
268 setOperationAction(ISD::SELECT, MVT::i32, Legal);
269 setOperationAction(ISD::SELECT, MVT::i64, Expand);
271 setOperationAction(ISD::SETCC, MVT::i8, Legal);
272 setOperationAction(ISD::SETCC, MVT::i16, Legal);
273 setOperationAction(ISD::SETCC, MVT::i32, Legal);
274 setOperationAction(ISD::SETCC, MVT::i64, Expand);
276 // Zero extension and sign extension for i64 have to be
278 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
279 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
280 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
282 // Custom lower truncates
283 setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
284 setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
285 setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
286 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
288 // SPU has a legal FP -> signed INT instruction
289 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
290 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
291 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f32, Custom);
296 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
298 // SPU has [U|S]INT_TO_FP
299 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
301 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
304 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
306 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
308 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
311 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
313 // We cannot sextinreg(i1). Expand to shifts.
314 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
316 // Support label based line numbers.
317 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
318 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
320 // We want to legalize GlobalAddress and ConstantPool nodes into the
321 // appropriate instructions to materialize the address.
322 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
324 MVT VT = (MVT::SimpleValueType)sctype;
326 setOperationAction(ISD::GlobalAddress, VT, Custom);
327 setOperationAction(ISD::ConstantPool, VT, Custom);
328 setOperationAction(ISD::JumpTable, VT, Custom);
331 // RET must be custom lowered, to meet ABI requirements
332 setOperationAction(ISD::RET, MVT::Other, Custom);
334 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
335 setOperationAction(ISD::VASTART , MVT::Other, Custom);
337 // Use the default implementation.
338 setOperationAction(ISD::VAARG , MVT::Other, Expand);
339 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
340 setOperationAction(ISD::VAEND , MVT::Other, Expand);
341 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
342 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
344 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
346 // Cell SPU has instructions for converting between i64 and fp.
347 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
350 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
351 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
353 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
354 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
356 // First set operation action for all vector types to expand. Then we
357 // will selectively turn on ones that can be effectively codegen'd.
358 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
363 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
365 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
366 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
367 MVT VT = (MVT::SimpleValueType)i;
369 // add/sub are legal for all supported vector VT's.
370 setOperationAction(ISD::ADD , VT, Legal);
371 setOperationAction(ISD::SUB , VT, Legal);
372 // mul has to be custom lowered.
373 setOperationAction(ISD::MUL , VT, Custom);
375 setOperationAction(ISD::AND , VT, Legal);
376 setOperationAction(ISD::OR , VT, Legal);
377 setOperationAction(ISD::XOR , VT, Legal);
378 setOperationAction(ISD::LOAD , VT, Legal);
379 setOperationAction(ISD::SELECT, VT, Legal);
380 setOperationAction(ISD::STORE, VT, Legal);
382 // These operations need to be expanded:
383 setOperationAction(ISD::SDIV, VT, Expand);
384 setOperationAction(ISD::SREM, VT, Expand);
385 setOperationAction(ISD::UDIV, VT, Expand);
386 setOperationAction(ISD::UREM, VT, Expand);
387 setOperationAction(ISD::FDIV, VT, Custom);
389 // Custom lower build_vector, constant pool spills, insert and
390 // extract vector elements:
391 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
392 setOperationAction(ISD::ConstantPool, VT, Custom);
393 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
394 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
395 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
399 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setShiftAmountType(MVT::i32);
406 setBooleanContents(ZeroOrOneBooleanContent);
408 setStackPointerRegisterToSaveRestore(SPU::R1);
410 // We have target-specific dag combine patterns for the following nodes:
411 setTargetDAGCombine(ISD::ADD);
412 setTargetDAGCombine(ISD::ZERO_EXTEND);
413 setTargetDAGCombine(ISD::SIGN_EXTEND);
414 setTargetDAGCombine(ISD::ANY_EXTEND);
416 computeRegisterProperties();
418 // Set other properties:
419 setSchedulingPreference(SchedulingForLatency);
423 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
425 if (node_names.empty()) {
426 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
427 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
428 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
429 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
430 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
431 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
432 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
433 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
434 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
435 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
436 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
437 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
438 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
439 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
440 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
441 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
442 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
446 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
447 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
448 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
449 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
450 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
451 "SPUISD::ROTQUAD_RZ_BYTES";
452 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
453 "SPUISD::ROTQUAD_RZ_BITS";
454 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
455 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
456 "SPUISD::ROTBYTES_LEFT_BITS";
457 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
458 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
459 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
460 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
461 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
462 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
463 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
464 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
465 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
468 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
470 return ((i != node_names.end()) ? i->second : 0);
473 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
474 MVT VT = Op.getValueType();
475 return (VT.isInteger() ? VT : MVT(MVT::i32));
478 //===----------------------------------------------------------------------===//
479 // Calling convention code:
480 //===----------------------------------------------------------------------===//
482 #include "SPUGenCallingConv.inc"
484 //===----------------------------------------------------------------------===//
485 // LowerOperation implementation
486 //===----------------------------------------------------------------------===//
488 /// Aligned load common code for CellSPU
490 \param[in] Op The SelectionDAG load or store operand
491 \param[in] DAG The selection DAG
492 \param[in] ST CellSPU subtarget information structure
493 \param[in,out] alignment Caller initializes this to the load or store node's
494 value from getAlignment(), may be updated while generating the aligned load
495 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
496 offset (divisible by 16, modulo 16 == 0)
497 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
498 offset of the preferred slot (modulo 16 != 0)
499 \param[in,out] VT Caller initializes this value type to the the load or store
500 node's loaded or stored value type; may be updated if an i1-extended load or
502 \param[out] was16aligned true if the base pointer had 16-byte alignment,
503 otherwise false. Can help to determine if the chunk needs to be rotated.
505 Both load and store lowering load a block of data aligned on a 16-byte
506 boundary. This is the common aligned load code shared between both.
509 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
511 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
512 MVT &VT, bool &was16aligned)
514 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
515 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
516 SDValue basePtr = LSN->getBasePtr();
517 SDValue chain = LSN->getChain();
519 if (basePtr.getOpcode() == ISD::ADD) {
520 SDValue Op1 = basePtr.getNode()->getOperand(1);
522 if (Op1.getOpcode() == ISD::Constant
523 || Op1.getOpcode() == ISD::TargetConstant) {
524 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
526 alignOffs = (int) CN->getZExtValue();
527 prefSlotOffs = (int) (alignOffs & 0xf);
529 // Adjust the rotation amount to ensure that the final result ends up in
530 // the preferred slot:
531 prefSlotOffs -= vtm->prefslot_byte;
532 basePtr = basePtr.getOperand(0);
534 // Loading from memory, can we adjust alignment?
535 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
536 SDValue APtr = basePtr.getOperand(0);
537 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
538 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
539 alignment = GSDN->getGlobal()->getAlignment();
544 prefSlotOffs = -vtm->prefslot_byte;
546 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
547 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
548 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
549 prefSlotOffs = (int) (alignOffs & 0xf);
550 prefSlotOffs -= vtm->prefslot_byte;
551 basePtr = DAG.getRegister(SPU::R1, VT);
554 prefSlotOffs = -vtm->prefslot_byte;
557 if (alignment == 16) {
558 // Realign the base pointer as a D-Form address:
559 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
560 basePtr = DAG.getNode(ISD::ADD, PtrVT,
562 DAG.getConstant((alignOffs & ~0xf), PtrVT));
565 // Emit the vector load:
567 return DAG.getLoad(MVT::v16i8, chain, basePtr,
568 LSN->getSrcValue(), LSN->getSrcValueOffset(),
569 LSN->isVolatile(), 16);
572 // Unaligned load or we're using the "large memory" model, which means that
573 // we have to be very pessimistic:
574 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
575 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
576 DAG.getConstant(0, PtrVT));
580 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
581 DAG.getConstant((alignOffs & ~0xf), PtrVT));
582 was16aligned = false;
583 return DAG.getLoad(MVT::v16i8, chain, basePtr,
584 LSN->getSrcValue(), LSN->getSrcValueOffset(),
585 LSN->isVolatile(), 16);
588 /// Custom lower loads for CellSPU
590 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
591 within a 16-byte block, we have to rotate to extract the requested element.
593 For extending loads, we also want to ensure that the following sequence is
594 emitted, e.g. for MVT::f32 extending load to MVT::f64:
598 %2 v16i8,ch = rotate %1
599 %3 v4f8, ch = bitconvert %2
600 %4 f32 = vec2perfslot %3
601 %5 f64 = fp_extend %4
605 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
606 LoadSDNode *LN = cast<LoadSDNode>(Op);
607 SDValue the_chain = LN->getChain();
608 MVT InVT = LN->getMemoryVT();
609 MVT OutVT = Op.getValueType();
610 ISD::LoadExtType ExtType = LN->getExtensionType();
611 unsigned alignment = LN->getAlignment();
614 switch (LN->getAddressingMode()) {
615 case ISD::UNINDEXED: {
619 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
622 if (result.getNode() == 0)
625 the_chain = result.getValue(1);
626 // Rotate the chunk if necessary
629 if (rotamt != 0 || !was16aligned) {
630 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
634 Ops[1] = DAG.getConstant(rotamt, MVT::i16);
636 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
637 LoadSDNode *LN1 = cast<LoadSDNode>(result);
638 Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
639 DAG.getConstant(rotamt, PtrVT));
642 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
645 // Convert the loaded v16i8 vector to the appropriate vector type
646 // specified by the operand:
647 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
648 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
649 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
651 // Handle extending loads by extending the scalar result:
652 if (ExtType == ISD::SEXTLOAD) {
653 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
654 } else if (ExtType == ISD::ZEXTLOAD) {
655 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
656 } else if (ExtType == ISD::EXTLOAD) {
657 unsigned NewOpc = ISD::ANY_EXTEND;
659 if (OutVT.isFloatingPoint())
660 NewOpc = ISD::FP_EXTEND;
662 result = DAG.getNode(NewOpc, OutVT, result);
665 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
666 SDValue retops[2] = {
671 result = DAG.getNode(SPUISD::LDRESULT, retvts,
672 retops, sizeof(retops) / sizeof(retops[0]));
679 case ISD::LAST_INDEXED_MODE:
680 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
682 cerr << (unsigned) LN->getAddressingMode() << "\n";
690 /// Custom lower stores for CellSPU
692 All CellSPU stores are aligned to 16-byte boundaries, so for elements
693 within a 16-byte block, we have to generate a shuffle to insert the
694 requested element into its place, then store the resulting block.
697 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
698 StoreSDNode *SN = cast<StoreSDNode>(Op);
699 SDValue Value = SN->getValue();
700 MVT VT = Value.getValueType();
701 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
702 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
703 unsigned alignment = SN->getAlignment();
705 switch (SN->getAddressingMode()) {
706 case ISD::UNINDEXED: {
707 int chunk_offset, slot_offset;
710 // The vector type we really want to load from the 16-byte chunk.
711 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
712 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
714 SDValue alignLoadVec =
715 AlignedLoad(Op, DAG, ST, SN, alignment,
716 chunk_offset, slot_offset, VT, was16aligned);
718 if (alignLoadVec.getNode() == 0)
721 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
722 SDValue basePtr = LN->getBasePtr();
723 SDValue the_chain = alignLoadVec.getValue(1);
724 SDValue theValue = SN->getValue();
728 && (theValue.getOpcode() == ISD::AssertZext
729 || theValue.getOpcode() == ISD::AssertSext)) {
730 // Drill down and get the value for zero- and sign-extended
732 theValue = theValue.getOperand(0);
737 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
738 SDValue insertEltPtr;
740 // If the base pointer is already a D-form address, then just create
741 // a new D-form address with a slot offset and the orignal base pointer.
742 // Otherwise generate a D-form address with the slot offset relative
743 // to the stack pointer, which is always aligned.
744 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
745 DEBUG(basePtr.getNode()->dump(&DAG));
748 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
749 (basePtr.getOpcode() == ISD::ADD
750 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
751 insertEltPtr = basePtr;
753 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
756 SDValue insertEltOp =
757 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
758 SDValue vectorizeOp =
759 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
761 result = DAG.getNode(SPUISD::SHUFB, vecVT,
762 vectorizeOp, alignLoadVec,
763 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
765 result = DAG.getStore(the_chain, result, basePtr,
766 LN->getSrcValue(), LN->getSrcValueOffset(),
767 LN->isVolatile(), LN->getAlignment());
769 #if 0 && defined(NDEBUG)
770 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
771 const SDValue ¤tRoot = DAG.getRoot();
774 cerr << "------- CellSPU:LowerStore result:\n";
777 DAG.setRoot(currentRoot);
788 case ISD::LAST_INDEXED_MODE:
789 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
791 cerr << (unsigned) SN->getAddressingMode() << "\n";
799 /// Generate the address of a constant pool entry.
801 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
802 MVT PtrVT = Op.getValueType();
803 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
804 Constant *C = CP->getConstVal();
805 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
806 SDValue Zero = DAG.getConstant(0, PtrVT);
807 const TargetMachine &TM = DAG.getTarget();
809 if (TM.getRelocationModel() == Reloc::Static) {
810 if (!ST->usingLargeMem()) {
811 // Just return the SDValue with the constant pool address in it.
812 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
814 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
815 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
816 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
821 "LowerConstantPool: Relocation model other than static"
827 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
828 MVT PtrVT = Op.getValueType();
829 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
830 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
831 SDValue Zero = DAG.getConstant(0, PtrVT);
832 const TargetMachine &TM = DAG.getTarget();
834 if (TM.getRelocationModel() == Reloc::Static) {
835 if (!ST->usingLargeMem()) {
836 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
838 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
839 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
840 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
845 "LowerJumpTable: Relocation model other than static not supported.");
850 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
851 MVT PtrVT = Op.getValueType();
852 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
853 GlobalValue *GV = GSDN->getGlobal();
854 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
855 const TargetMachine &TM = DAG.getTarget();
856 SDValue Zero = DAG.getConstant(0, PtrVT);
858 if (TM.getRelocationModel() == Reloc::Static) {
859 if (!ST->usingLargeMem()) {
860 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
862 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
863 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
864 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
867 cerr << "LowerGlobalAddress: Relocation model other than static not "
876 //! Custom lower i64 integer constants
878 This code inserts all of the necessary juggling that needs to occur to load
879 a 64-bit constant into a register.
882 LowerConstant(SDValue Op, SelectionDAG &DAG) {
883 MVT VT = Op.getValueType();
885 if (VT == MVT::i64) {
886 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
887 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
888 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
889 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
891 cerr << "LowerConstant: unhandled constant type "
901 //! Custom lower double precision floating point constants
903 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
904 MVT VT = Op.getValueType();
906 if (VT == MVT::f64) {
907 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
910 "LowerConstantFP: Node is not ConstantFPSDNode");
912 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
913 SDValue T = DAG.getConstant(dbits, MVT::i64);
914 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
915 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
916 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
922 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
924 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
926 SDValue Cond = Op.getOperand(1);
927 MVT CondVT = Cond.getValueType();
930 if (CondVT == MVT::i8) {
932 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
934 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
937 return SDValue(); // Unchanged
941 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
943 MachineFunction &MF = DAG.getMachineFunction();
944 MachineFrameInfo *MFI = MF.getFrameInfo();
945 MachineRegisterInfo &RegInfo = MF.getRegInfo();
946 SmallVector<SDValue, 48> ArgValues;
947 SDValue Root = Op.getOperand(0);
948 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
950 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
951 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
953 unsigned ArgOffset = SPUFrameInfo::minStackSize();
954 unsigned ArgRegIdx = 0;
955 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
957 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
959 // Add DAG nodes to load the arguments or copy them out of registers.
960 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
961 ArgNo != e; ++ArgNo) {
962 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
963 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
966 if (ArgRegIdx < NumArgRegs) {
967 const TargetRegisterClass *ArgRegClass;
969 switch (ObjectVT.getSimpleVT()) {
971 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
972 << ObjectVT.getMVTString()
977 ArgRegClass = &SPU::R8CRegClass;
980 ArgRegClass = &SPU::R16CRegClass;
983 ArgRegClass = &SPU::R32CRegClass;
986 ArgRegClass = &SPU::R64CRegClass;
989 ArgRegClass = &SPU::R32FPRegClass;
992 ArgRegClass = &SPU::R64FPRegClass;
1000 ArgRegClass = &SPU::VECREGRegClass;
1004 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1005 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1006 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1009 // We need to load the argument to a virtual register if we determined
1010 // above that we ran out of physical registers of the appropriate type
1011 // or we're forced to do vararg
1012 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1013 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1014 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1015 ArgOffset += StackSlotSize;
1018 ArgValues.push_back(ArgVal);
1020 Root = ArgVal.getOperand(0);
1025 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1026 // We will spill (79-3)+1 registers to the stack
1027 SmallVector<SDValue, 79-3+1> MemOps;
1029 // Create the frame slot
1031 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1032 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1033 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1034 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1035 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1036 Root = Store.getOperand(0);
1037 MemOps.push_back(Store);
1039 // Increment address by stack slot size for the next stored argument
1040 ArgOffset += StackSlotSize;
1042 if (!MemOps.empty())
1043 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1046 ArgValues.push_back(Root);
1048 // Return the new list of results.
1049 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1050 &ArgValues[0], ArgValues.size());
1053 /// isLSAAddress - Return the immediate to use if the specified
1054 /// value is representable as a LSA address.
1055 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1056 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1059 int Addr = C->getZExtValue();
1060 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1061 (Addr << 14 >> 14) != Addr)
1062 return 0; // Top 14 bits have to be sext of immediate.
1064 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1069 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1070 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1071 SDValue Chain = TheCall->getChain();
1072 SDValue Callee = TheCall->getCallee();
1073 unsigned NumOps = TheCall->getNumArgs();
1074 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1075 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1076 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1078 // Handy pointer type
1079 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1081 // Accumulate how many bytes are to be pushed on the stack, including the
1082 // linkage area, and parameter passing area. According to the SPU ABI,
1083 // we minimally need space for [LR] and [SP]
1084 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1086 // Set up a copy of the stack pointer for use loading and storing any
1087 // arguments that may not fit in the registers available for argument
1089 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1091 // Figure out which arguments are going to go in registers, and which in
1093 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1094 unsigned ArgRegIdx = 0;
1096 // Keep track of registers passing arguments
1097 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1098 // And the arguments passed on the stack
1099 SmallVector<SDValue, 8> MemOpChains;
1101 for (unsigned i = 0; i != NumOps; ++i) {
1102 SDValue Arg = TheCall->getArg(i);
1104 // PtrOff will be used to store the current argument to the stack if a
1105 // register cannot be found for it.
1106 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1107 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1109 switch (Arg.getValueType().getSimpleVT()) {
1110 default: assert(0 && "Unexpected ValueType for argument!");
1114 if (ArgRegIdx != NumArgRegs) {
1115 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1117 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1118 ArgOffset += StackSlotSize;
1123 if (ArgRegIdx != NumArgRegs) {
1124 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1126 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1127 ArgOffset += StackSlotSize;
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1137 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1144 // Update number of stack bytes actually used, insert a call sequence start
1145 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1146 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1149 if (!MemOpChains.empty()) {
1150 // Adjust the stack pointer for the stack arguments.
1151 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1152 &MemOpChains[0], MemOpChains.size());
1155 // Build a sequence of copy-to-reg nodes chained together with token chain
1156 // and flag operands which copy the outgoing args into the appropriate regs.
1158 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1159 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1161 InFlag = Chain.getValue(1);
1164 SmallVector<SDValue, 8> Ops;
1165 unsigned CallOpc = SPUISD::CALL;
1167 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1168 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1169 // node so that legalize doesn't hack it.
1170 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1171 GlobalValue *GV = G->getGlobal();
1172 MVT CalleeVT = Callee.getValueType();
1173 SDValue Zero = DAG.getConstant(0, PtrVT);
1174 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1176 if (!ST->usingLargeMem()) {
1177 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1178 // style calls, otherwise, external symbols are BRASL calls. This assumes
1179 // that declared/defined symbols are in the same compilation unit and can
1180 // be reached through PC-relative jumps.
1183 // This may be an unsafe assumption for JIT and really large compilation
1185 if (GV->isDeclaration()) {
1186 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1188 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1191 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1193 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1195 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1196 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1197 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1198 // If this is an absolute destination address that appears to be a legal
1199 // local store address, use the munged value.
1200 Callee = SDValue(Dest, 0);
1203 Ops.push_back(Chain);
1204 Ops.push_back(Callee);
1206 // Add argument registers to the end of the list so that they are known live
1208 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1209 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1210 RegsToPass[i].second.getValueType()));
1212 if (InFlag.getNode())
1213 Ops.push_back(InFlag);
1214 // Returns a chain and a flag for retval copy to use.
1215 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1216 &Ops[0], Ops.size());
1217 InFlag = Chain.getValue(1);
1219 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1220 DAG.getIntPtrConstant(0, true), InFlag);
1221 if (TheCall->getValueType(0) != MVT::Other)
1222 InFlag = Chain.getValue(1);
1224 SDValue ResultVals[3];
1225 unsigned NumResults = 0;
1227 // If the call has results, copy the values out of the ret val registers.
1228 switch (TheCall->getValueType(0).getSimpleVT()) {
1229 default: assert(0 && "Unexpected ret value!");
1230 case MVT::Other: break;
1232 if (TheCall->getValueType(1) == MVT::i32) {
1233 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1234 ResultVals[0] = Chain.getValue(0);
1235 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1236 Chain.getValue(2)).getValue(1);
1237 ResultVals[1] = Chain.getValue(0);
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1241 ResultVals[0] = Chain.getValue(0);
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1247 ResultVals[0] = Chain.getValue(0);
1252 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1253 InFlag).getValue(1);
1254 ResultVals[0] = Chain.getValue(0);
1262 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1263 InFlag).getValue(1);
1264 ResultVals[0] = Chain.getValue(0);
1269 // If the function returns void, just return the chain.
1270 if (NumResults == 0)
1273 // Otherwise, merge everything together with a MERGE_VALUES node.
1274 ResultVals[NumResults++] = Chain;
1275 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1276 return Res.getValue(Op.getResNo());
1280 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1281 SmallVector<CCValAssign, 16> RVLocs;
1282 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1283 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1284 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1285 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1287 // If this is the first return lowered for this function, add the regs to the
1288 // liveout set for the function.
1289 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1290 for (unsigned i = 0; i != RVLocs.size(); ++i)
1291 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1294 SDValue Chain = Op.getOperand(0);
1297 // Copy the result values into the output registers.
1298 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1299 CCValAssign &VA = RVLocs[i];
1300 assert(VA.isRegLoc() && "Can only return in registers!");
1301 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1302 Flag = Chain.getValue(1);
1306 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1308 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1312 //===----------------------------------------------------------------------===//
1313 // Vector related lowering:
1314 //===----------------------------------------------------------------------===//
1316 static ConstantSDNode *
1317 getVecImm(SDNode *N) {
1318 SDValue OpVal(0, 0);
1320 // Check to see if this buildvec has a single non-undef value in its elements.
1321 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1322 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1323 if (OpVal.getNode() == 0)
1324 OpVal = N->getOperand(i);
1325 else if (OpVal != N->getOperand(i))
1329 if (OpVal.getNode() != 0) {
1330 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1335 return 0; // All UNDEF: use implicit def.; not Constant node
1338 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1339 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1341 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1343 if (ConstantSDNode *CN = getVecImm(N)) {
1344 uint64_t Value = CN->getZExtValue();
1345 if (ValueType == MVT::i64) {
1346 uint64_t UValue = CN->getZExtValue();
1347 uint32_t upper = uint32_t(UValue >> 32);
1348 uint32_t lower = uint32_t(UValue);
1351 Value = Value >> 32;
1353 if (Value <= 0x3ffff)
1354 return DAG.getTargetConstant(Value, ValueType);
1360 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1361 /// and the value fits into a signed 16-bit constant, and if so, return the
1363 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1365 if (ConstantSDNode *CN = getVecImm(N)) {
1366 int64_t Value = CN->getSExtValue();
1367 if (ValueType == MVT::i64) {
1368 uint64_t UValue = CN->getZExtValue();
1369 uint32_t upper = uint32_t(UValue >> 32);
1370 uint32_t lower = uint32_t(UValue);
1373 Value = Value >> 32;
1375 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1376 return DAG.getTargetConstant(Value, ValueType);
1383 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1384 /// and the value fits into a signed 10-bit constant, and if so, return the
1386 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1388 if (ConstantSDNode *CN = getVecImm(N)) {
1389 int64_t Value = CN->getSExtValue();
1390 if (ValueType == MVT::i64) {
1391 uint64_t UValue = CN->getZExtValue();
1392 uint32_t upper = uint32_t(UValue >> 32);
1393 uint32_t lower = uint32_t(UValue);
1396 Value = Value >> 32;
1398 if (isS10Constant(Value))
1399 return DAG.getTargetConstant(Value, ValueType);
1405 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1406 /// and the value fits into a signed 8-bit constant, and if so, return the
1409 /// @note: The incoming vector is v16i8 because that's the only way we can load
1410 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1412 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1414 if (ConstantSDNode *CN = getVecImm(N)) {
1415 int Value = (int) CN->getZExtValue();
1416 if (ValueType == MVT::i16
1417 && Value <= 0xffff /* truncated from uint64_t */
1418 && ((short) Value >> 8) == ((short) Value & 0xff))
1419 return DAG.getTargetConstant(Value & 0xff, ValueType);
1420 else if (ValueType == MVT::i8
1421 && (Value & 0xff) == Value)
1422 return DAG.getTargetConstant(Value, ValueType);
1428 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into a signed 16-bit constant, and if so, return the
1431 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 uint64_t Value = CN->getZExtValue();
1435 if ((ValueType == MVT::i32
1436 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1437 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1438 return DAG.getTargetConstant(Value >> 16, ValueType);
1444 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1445 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1453 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1454 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1455 if (ConstantSDNode *CN = getVecImm(N)) {
1456 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1462 // If this is a vector of constants or undefs, get the bits. A bit in
1463 // UndefBits is set if the corresponding element of the vector is an
1464 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1465 // zero. Return true if this is not an array of constants, false if it is.
1467 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1468 uint64_t UndefBits[2]) {
1469 // Start with zero'd results.
1470 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1472 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1473 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1474 SDValue OpVal = BV->getOperand(i);
1476 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1477 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1479 uint64_t EltBits = 0;
1480 if (OpVal.getOpcode() == ISD::UNDEF) {
1481 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1482 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1484 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1485 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1486 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1487 const APFloat &apf = CN->getValueAPF();
1488 EltBits = (CN->getValueType(0) == MVT::f32
1489 ? FloatToBits(apf.convertToFloat())
1490 : DoubleToBits(apf.convertToDouble()));
1492 // Nonconstant element.
1496 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1499 //printf("%llx %llx %llx %llx\n",
1500 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1504 /// If this is a splat (repetition) of a value across the whole vector, return
1505 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1506 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1507 /// SplatSize = 1 byte.
1508 static bool isConstantSplat(const uint64_t Bits128[2],
1509 const uint64_t Undef128[2],
1511 uint64_t &SplatBits, uint64_t &SplatUndef,
1513 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1514 // the same as the lower 64-bits, ignoring undefs.
1515 uint64_t Bits64 = Bits128[0] | Bits128[1];
1516 uint64_t Undef64 = Undef128[0] & Undef128[1];
1517 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1518 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1519 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1520 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1522 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1523 if (MinSplatBits < 64) {
1525 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1527 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1528 if (MinSplatBits < 32) {
1530 // If the top 16-bits are different than the lower 16-bits, ignoring
1531 // undefs, we have an i32 splat.
1532 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1533 if (MinSplatBits < 16) {
1534 // If the top 8-bits are different than the lower 8-bits, ignoring
1535 // undefs, we have an i16 splat.
1536 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1537 == ((Bits16 >> 8) & ~Undef16)) {
1538 // Otherwise, we have an 8-bit splat.
1539 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1540 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1546 SplatUndef = Undef16;
1553 SplatUndef = Undef32;
1559 SplatBits = Bits128[0];
1560 SplatUndef = Undef128[0];
1566 return false; // Can't be a splat if two pieces don't match.
1569 // If this is a case we can't handle, return null and let the default
1570 // expansion code take care of it. If we CAN select this case, and if it
1571 // selects to a single instruction, return Op. Otherwise, if we can codegen
1572 // this case more efficiently than a constant pool load, lower it to the
1573 // sequence of ops that should be used.
1574 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1575 MVT VT = Op.getValueType();
1576 // If this is a vector of constants or undefs, get the bits. A bit in
1577 // UndefBits is set if the corresponding element of the vector is an
1578 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1580 uint64_t VectorBits[2];
1581 uint64_t UndefBits[2];
1582 uint64_t SplatBits, SplatUndef;
1584 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1585 || !isConstantSplat(VectorBits, UndefBits,
1586 VT.getVectorElementType().getSizeInBits(),
1587 SplatBits, SplatUndef, SplatSize))
1588 return SDValue(); // Not a constant vector, not a splat.
1590 switch (VT.getSimpleVT()) {
1593 uint32_t Value32 = SplatBits;
1594 assert(SplatSize == 4
1595 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1596 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1597 SDValue T = DAG.getConstant(Value32, MVT::i32);
1598 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1599 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1603 uint64_t f64val = SplatBits;
1604 assert(SplatSize == 8
1605 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1606 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1607 SDValue T = DAG.getConstant(f64val, MVT::i64);
1608 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1609 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1613 // 8-bit constants have to be expanded to 16-bits
1614 unsigned short Value16 = SplatBits | (SplatBits << 8);
1616 for (int i = 0; i < 8; ++i)
1617 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1618 return DAG.getNode(ISD::BIT_CONVERT, VT,
1619 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1622 unsigned short Value16;
1624 Value16 = (unsigned short) (SplatBits & 0xffff);
1626 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1627 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1629 for (int i = 0; i < 8; ++i) Ops[i] = T;
1630 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1633 unsigned int Value = SplatBits;
1634 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1635 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1638 uint64_t val = SplatBits;
1639 uint32_t upper = uint32_t(val >> 32);
1640 uint32_t lower = uint32_t(val);
1642 if (upper == lower) {
1643 // Magic constant that can be matched by IL, ILA, et. al.
1644 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1645 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1649 SmallVector<SDValue, 16> ShufBytes;
1651 bool upper_special, lower_special;
1653 // NOTE: This code creates common-case shuffle masks that can be easily
1654 // detected as common expressions. It is not attempting to create highly
1655 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1657 // Detect if the upper or lower half is a special shuffle mask pattern:
1658 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1659 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1661 // Create lower vector if not a special pattern
1662 if (!lower_special) {
1663 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1664 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1665 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1666 LO32C, LO32C, LO32C, LO32C));
1669 // Create upper vector if not a special pattern
1670 if (!upper_special) {
1671 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1672 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1673 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1674 HI32C, HI32C, HI32C, HI32C));
1677 // If either upper or lower are special, then the two input operands are
1678 // the same (basically, one of them is a "don't care")
1683 if (lower_special && upper_special) {
1684 // Unhappy situation... both upper and lower are special, so punt with
1685 // a target constant:
1686 SDValue Zero = DAG.getConstant(0, MVT::i32);
1687 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1691 for (int i = 0; i < 4; ++i) {
1693 for (int j = 0; j < 4; ++j) {
1695 bool process_upper, process_lower;
1697 process_upper = (upper_special && (i & 1) == 0);
1698 process_lower = (lower_special && (i & 1) == 1);
1700 if (process_upper || process_lower) {
1701 if ((process_upper && upper == 0)
1702 || (process_lower && lower == 0))
1704 else if ((process_upper && upper == 0xffffffff)
1705 || (process_lower && lower == 0xffffffff))
1707 else if ((process_upper && upper == 0x80000000)
1708 || (process_lower && lower == 0x80000000))
1709 val |= (j == 0 ? 0xe0 : 0x80);
1711 val |= i * 4 + j + ((i & 1) * 16);
1714 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1717 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1718 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1719 &ShufBytes[0], ShufBytes.size()));
1727 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1728 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1729 /// permutation vector, V3, is monotonically increasing with one "exception"
1730 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1731 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1732 /// In either case, the net result is going to eventually invoke SHUFB to
1733 /// permute/shuffle the bytes from V1 and V2.
1735 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1736 /// control word for byte/halfword/word insertion. This takes care of a single
1737 /// element move from V2 into V1.
1739 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1740 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1741 SDValue V1 = Op.getOperand(0);
1742 SDValue V2 = Op.getOperand(1);
1743 SDValue PermMask = Op.getOperand(2);
1745 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1747 // If we have a single element being moved from V1 to V2, this can be handled
1748 // using the C*[DX] compute mask instructions, but the vector elements have
1749 // to be monotonically increasing with one exception element.
1750 MVT EltVT = V1.getValueType().getVectorElementType();
1751 unsigned EltsFromV2 = 0;
1753 unsigned V2EltIdx0 = 0;
1754 unsigned CurrElt = 0;
1755 bool monotonic = true;
1756 if (EltVT == MVT::i8)
1758 else if (EltVT == MVT::i16)
1760 else if (EltVT == MVT::i32)
1763 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1765 for (unsigned i = 0, e = PermMask.getNumOperands();
1766 EltsFromV2 <= 1 && monotonic && i != e;
1769 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1772 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1774 if (SrcElt >= V2EltIdx0) {
1776 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1777 } else if (CurrElt != SrcElt) {
1784 if (EltsFromV2 == 1 && monotonic) {
1785 // Compute mask and shuffle
1786 MachineFunction &MF = DAG.getMachineFunction();
1787 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1788 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1789 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1790 // Initialize temporary register to 0
1791 SDValue InitTempReg =
1792 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1793 // Copy register's contents as index in SHUFFLE_MASK:
1794 SDValue ShufMaskOp =
1795 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1796 DAG.getTargetConstant(V2Elt, MVT::i32),
1797 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1798 // Use shuffle mask in SHUFB synthetic instruction:
1799 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1801 // Convert the SHUFFLE_VECTOR mask's input element units to the
1803 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1805 SmallVector<SDValue, 16> ResultMask;
1806 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1808 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1811 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1813 for (unsigned j = 0; j < BytesPerElement; ++j) {
1814 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1819 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1820 &ResultMask[0], ResultMask.size());
1821 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1825 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1826 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1828 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1829 // For a constant, build the appropriate constant vector, which will
1830 // eventually simplify to a vector register load.
1832 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1833 SmallVector<SDValue, 16> ConstVecValues;
1837 // Create a constant vector:
1838 switch (Op.getValueType().getSimpleVT()) {
1839 default: assert(0 && "Unexpected constant value type in "
1840 "LowerSCALAR_TO_VECTOR");
1841 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1842 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1843 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1844 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1845 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1846 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1849 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1850 for (size_t j = 0; j < n_copies; ++j)
1851 ConstVecValues.push_back(CValue);
1853 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1854 &ConstVecValues[0], ConstVecValues.size());
1856 // Otherwise, copy the value from one register to another:
1857 switch (Op0.getValueType().getSimpleVT()) {
1858 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1865 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1872 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1873 switch (Op.getValueType().getSimpleVT()) {
1875 cerr << "CellSPU: Unknown vector multiplication, got "
1876 << Op.getValueType().getMVTString()
1882 SDValue rA = Op.getOperand(0);
1883 SDValue rB = Op.getOperand(1);
1884 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1885 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1886 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1887 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1889 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1893 // Multiply two v8i16 vectors (pipeline friendly version):
1894 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1895 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1896 // c) Use SELB to select upper and lower halves from the intermediate results
1898 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1899 // dual-issue. This code does manage to do this, even if it's a little on
1902 MachineFunction &MF = DAG.getMachineFunction();
1903 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1904 SDValue Chain = Op.getOperand(0);
1905 SDValue rA = Op.getOperand(0);
1906 SDValue rB = Op.getOperand(1);
1907 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1908 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1911 DAG.getCopyToReg(Chain, FSMBIreg,
1912 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1913 DAG.getConstant(0xcccc, MVT::i16)));
1916 DAG.getCopyToReg(FSMBOp, HiProdReg,
1917 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1919 SDValue HHProd_v4i32 =
1920 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1921 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1923 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1924 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1925 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1926 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1928 DAG.getConstant(16, MVT::i16))),
1929 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1932 // This M00sE is N@stI! (apologies to Monty Python)
1934 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1935 // is to break it all apart, sign extend, and reassemble the various
1936 // intermediate products.
1938 SDValue rA = Op.getOperand(0);
1939 SDValue rB = Op.getOperand(1);
1940 SDValue c8 = DAG.getConstant(8, MVT::i32);
1941 SDValue c16 = DAG.getConstant(16, MVT::i32);
1944 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1945 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1946 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1948 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1950 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1953 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1954 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1956 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1957 DAG.getConstant(0x2222, MVT::i16));
1959 SDValue LoProdParts =
1960 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1961 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1962 LLProd, LHProd, FSMBmask));
1964 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1967 DAG.getNode(ISD::AND, MVT::v4i32,
1969 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1970 LoProdMask, LoProdMask,
1971 LoProdMask, LoProdMask));
1974 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1975 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1978 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1979 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1982 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1989 DAG.getNode(SPUISD::VEC_SRA,
1990 MVT::v4i32, rAH, c8)),
1991 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1992 DAG.getNode(SPUISD::VEC_SRA,
1993 MVT::v4i32, rBH, c8)));
1996 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1998 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2002 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2004 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2005 DAG.getNode(ISD::OR, MVT::v4i32,
2013 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2014 MachineFunction &MF = DAG.getMachineFunction();
2015 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2017 SDValue A = Op.getOperand(0);
2018 SDValue B = Op.getOperand(1);
2019 MVT VT = Op.getValueType();
2021 unsigned VRegBR, VRegC;
2023 if (VT == MVT::f32) {
2024 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2025 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2027 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2028 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2030 // TODO: make sure we're feeding FPInterp the right arguments
2031 // Right now: fi B, frest(B)
2034 // (Floating Interpolate (FP Reciprocal Estimate B))
2036 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2037 DAG.getNode(SPUISD::FPInterp, VT, B,
2038 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2040 // Computes A * BRcpl and stores in a temporary register
2042 DAG.getCopyToReg(BRcpl, VRegC,
2043 DAG.getNode(ISD::FMUL, VT, A,
2044 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2045 // What's the Chain variable do? It's magic!
2046 // TODO: set Chain = Op(0).getEntryNode()
2048 return DAG.getNode(ISD::FADD, VT,
2049 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2050 DAG.getNode(ISD::FMUL, VT,
2051 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2052 DAG.getNode(ISD::FSUB, VT, A,
2053 DAG.getNode(ISD::FMUL, VT, B,
2054 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2057 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2058 MVT VT = Op.getValueType();
2059 SDValue N = Op.getOperand(0);
2060 SDValue Elt = Op.getOperand(1);
2063 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2064 // Constant argument:
2065 int EltNo = (int) C->getZExtValue();
2068 if (VT == MVT::i8 && EltNo >= 16)
2069 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2070 else if (VT == MVT::i16 && EltNo >= 8)
2071 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2072 else if (VT == MVT::i32 && EltNo >= 4)
2073 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2074 else if (VT == MVT::i64 && EltNo >= 2)
2075 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2077 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2078 // i32 and i64: Element 0 is the preferred slot
2079 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2082 // Need to generate shuffle mask and extract:
2083 int prefslot_begin = -1, prefslot_end = -1;
2084 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2086 switch (VT.getSimpleVT()) {
2088 assert(false && "Invalid value type!");
2090 prefslot_begin = prefslot_end = 3;
2094 prefslot_begin = 2; prefslot_end = 3;
2099 prefslot_begin = 0; prefslot_end = 3;
2104 prefslot_begin = 0; prefslot_end = 7;
2109 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2110 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2112 unsigned int ShufBytes[16];
2113 for (int i = 0; i < 16; ++i) {
2114 // zero fill uppper part of preferred slot, don't care about the
2116 unsigned int mask_val;
2117 if (i <= prefslot_end) {
2119 ((i < prefslot_begin)
2121 : elt_byte + (i - prefslot_begin));
2123 ShufBytes[i] = mask_val;
2125 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2128 SDValue ShufMask[4];
2129 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2130 unsigned bidx = i / 4;
2131 unsigned int bits = ((ShufBytes[bidx] << 24) |
2132 (ShufBytes[bidx+1] << 16) |
2133 (ShufBytes[bidx+2] << 8) |
2135 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2138 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2140 sizeof(ShufMask) / sizeof(ShufMask[0]));
2142 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2143 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2144 N, N, ShufMaskVec));
2146 // Variable index: Rotate the requested element into slot 0, then replicate
2147 // slot 0 across the vector
2148 MVT VecVT = N.getValueType();
2149 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2150 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2154 // Make life easier by making sure the index is zero-extended to i32
2155 if (Elt.getValueType() != MVT::i32)
2156 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2158 // Scale the index to a bit/byte shift quantity
2160 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2161 unsigned scaleShift = scaleFactor.logBase2();
2164 if (scaleShift > 0) {
2165 // Scale the shift factor:
2166 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2167 DAG.getConstant(scaleShift, MVT::i32));
2170 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2172 // Replicate the bytes starting at byte 0 across the entire vector (for
2173 // consistency with the notion of a unified register set)
2176 switch (VT.getSimpleVT()) {
2178 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2182 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2183 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2188 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2189 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2195 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2196 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2202 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2203 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2204 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2205 loFactor, hiFactor);
2210 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2211 DAG.getNode(SPUISD::SHUFB, VecVT,
2212 vecShift, vecShift, replicate));
2218 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2219 SDValue VecOp = Op.getOperand(0);
2220 SDValue ValOp = Op.getOperand(1);
2221 SDValue IdxOp = Op.getOperand(2);
2222 MVT VT = Op.getValueType();
2224 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2225 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2227 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2228 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2229 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2230 DAG.getRegister(SPU::R1, PtrVT),
2231 DAG.getConstant(CN->getSExtValue(), PtrVT));
2232 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2235 DAG.getNode(SPUISD::SHUFB, VT,
2236 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2238 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2243 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2245 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2247 assert(Op.getValueType() == MVT::i8);
2250 assert(0 && "Unhandled i8 math operator");
2254 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2256 SDValue N1 = Op.getOperand(1);
2257 N0 = (N0.getOpcode() != ISD::Constant
2258 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2259 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2261 N1 = (N1.getOpcode() != ISD::Constant
2262 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2263 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2265 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2266 DAG.getNode(Opc, MVT::i16, N0, N1));
2270 SDValue N1 = Op.getOperand(1);
2272 N0 = (N0.getOpcode() != ISD::Constant
2273 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2274 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2276 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2279 N1 = (N1.getOpcode() != ISD::Constant
2280 ? DAG.getNode(N1Opc, MVT::i32, N1)
2281 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2284 DAG.getNode(ISD::OR, MVT::i16, N0,
2285 DAG.getNode(ISD::SHL, MVT::i16,
2286 N0, DAG.getConstant(8, MVT::i32)));
2287 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2288 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2292 SDValue N1 = Op.getOperand(1);
2294 N0 = (N0.getOpcode() != ISD::Constant
2295 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2296 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2298 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2301 N1 = (N1.getOpcode() != ISD::Constant
2302 ? DAG.getNode(N1Opc, MVT::i16, N1)
2303 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2305 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2306 DAG.getNode(Opc, MVT::i16, N0, N1));
2309 SDValue N1 = Op.getOperand(1);
2311 N0 = (N0.getOpcode() != ISD::Constant
2312 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2313 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2315 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2318 N1 = (N1.getOpcode() != ISD::Constant
2319 ? DAG.getNode(N1Opc, MVT::i16, N1)
2320 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2322 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2323 DAG.getNode(Opc, MVT::i16, N0, N1));
2326 SDValue N1 = Op.getOperand(1);
2328 N0 = (N0.getOpcode() != ISD::Constant
2329 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2330 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2332 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2333 N1 = (N1.getOpcode() != ISD::Constant
2334 ? DAG.getNode(N1Opc, MVT::i16, N1)
2335 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2337 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2338 DAG.getNode(Opc, MVT::i16, N0, N1));
2346 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2348 MVT VT = Op.getValueType();
2349 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2351 SDValue Op0 = Op.getOperand(0);
2354 case ISD::ZERO_EXTEND:
2355 case ISD::SIGN_EXTEND:
2356 case ISD::ANY_EXTEND: {
2357 MVT Op0VT = Op0.getValueType();
2358 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2360 assert(Op0VT == MVT::i32
2361 && "CellSPU: Zero/sign extending something other than i32");
2363 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2365 SDValue PromoteScalar =
2366 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2368 if (Opc != ISD::SIGN_EXTEND) {
2369 // Use a shuffle to zero extend the i32 to i64 directly:
2371 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2372 DAG.getConstant(0x80808080, MVT::i32),
2373 DAG.getConstant(0x00010203, MVT::i32),
2374 DAG.getConstant(0x80808080, MVT::i32),
2375 DAG.getConstant(0x08090a0b, MVT::i32));
2376 SDValue zextShuffle =
2377 DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2378 PromoteScalar, PromoteScalar, shufMask);
2380 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2381 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2383 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2384 // right and propagate the sign bit) instruction.
2386 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2387 PromoteScalar, DAG.getConstant(4, MVT::i32));
2389 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2390 PromoteScalar, DAG.getConstant(32, MVT::i32));
2392 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2393 DAG.getConstant(0xf0f0, MVT::i16));
2394 SDValue CombineQuad =
2395 DAG.getNode(SPUISD::SELB, Op0VecVT,
2396 SignQuad, RotQuad, SelMask);
2398 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2399 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2404 // Turn operands into vectors to satisfy type checking (shufb works on
2407 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2409 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2410 SmallVector<SDValue, 16> ShufBytes;
2412 // Create the shuffle mask for "rotating" the borrow up one register slot
2413 // once the borrow is generated.
2414 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2415 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2416 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2417 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2420 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2421 SDValue ShiftedCarry =
2422 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2424 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2425 &ShufBytes[0], ShufBytes.size()));
2427 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2428 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2429 Op0, Op1, ShiftedCarry));
2433 // Turn operands into vectors to satisfy type checking (shufb works on
2436 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2438 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2439 SmallVector<SDValue, 16> ShufBytes;
2441 // Create the shuffle mask for "rotating" the borrow up one register slot
2442 // once the borrow is generated.
2443 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2444 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2445 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2446 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2449 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2450 SDValue ShiftedBorrow =
2451 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2452 BorrowGen, BorrowGen,
2453 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2454 &ShufBytes[0], ShufBytes.size()));
2456 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2457 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2458 Op0, Op1, ShiftedBorrow));
2462 SDValue ShiftAmt = Op.getOperand(1);
2463 MVT ShiftAmtVT = ShiftAmt.getValueType();
2464 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2466 DAG.getNode(SPUISD::SELB, VecVT,
2468 DAG.getConstant(0, VecVT),
2469 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2470 DAG.getConstant(0xff00ULL, MVT::i16)));
2471 SDValue ShiftAmtBytes =
2472 DAG.getNode(ISD::SRL, ShiftAmtVT,
2474 DAG.getConstant(3, ShiftAmtVT));
2475 SDValue ShiftAmtBits =
2476 DAG.getNode(ISD::AND, ShiftAmtVT,
2478 DAG.getConstant(7, ShiftAmtVT));
2480 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2481 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2482 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2483 MaskLower, ShiftAmtBytes),
2488 MVT VT = Op.getValueType();
2489 SDValue ShiftAmt = Op.getOperand(1);
2490 MVT ShiftAmtVT = ShiftAmt.getValueType();
2491 SDValue ShiftAmtBytes =
2492 DAG.getNode(ISD::SRL, ShiftAmtVT,
2494 DAG.getConstant(3, ShiftAmtVT));
2495 SDValue ShiftAmtBits =
2496 DAG.getNode(ISD::AND, ShiftAmtVT,
2498 DAG.getConstant(7, ShiftAmtVT));
2500 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2501 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2502 Op0, ShiftAmtBytes),
2507 // Promote Op0 to vector
2509 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2510 SDValue ShiftAmt = Op.getOperand(1);
2511 MVT ShiftVT = ShiftAmt.getValueType();
2513 // Negate variable shift amounts
2514 if (!isa<ConstantSDNode>(ShiftAmt)) {
2515 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2516 DAG.getConstant(0, ShiftVT), ShiftAmt);
2519 SDValue UpperHalfSign =
2520 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2521 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2522 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2523 Op0, DAG.getConstant(31, MVT::i32))));
2524 SDValue UpperHalfSignMask =
2525 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2526 SDValue UpperLowerMask =
2527 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2528 DAG.getConstant(0xff00, MVT::i16));
2529 SDValue UpperLowerSelect =
2530 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2531 UpperHalfSignMask, Op0, UpperLowerMask);
2532 SDValue RotateLeftBytes =
2533 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2534 UpperLowerSelect, ShiftAmt);
2535 SDValue RotateLeftBits =
2536 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2537 RotateLeftBytes, ShiftAmt);
2539 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2547 //! Lower byte immediate operations for v16i8 vectors:
2549 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2552 MVT VT = Op.getValueType();
2554 ConstVec = Op.getOperand(0);
2555 Arg = Op.getOperand(1);
2556 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2557 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2558 ConstVec = ConstVec.getOperand(0);
2560 ConstVec = Op.getOperand(1);
2561 Arg = Op.getOperand(0);
2562 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2563 ConstVec = ConstVec.getOperand(0);
2568 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2569 uint64_t VectorBits[2];
2570 uint64_t UndefBits[2];
2571 uint64_t SplatBits, SplatUndef;
2574 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2575 && isConstantSplat(VectorBits, UndefBits,
2576 VT.getVectorElementType().getSizeInBits(),
2577 SplatBits, SplatUndef, SplatSize)) {
2579 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2580 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2582 // Turn the BUILD_VECTOR into a set of target constants:
2583 for (size_t i = 0; i < tcVecSize; ++i)
2586 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2587 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2590 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2591 // lowered. Return the operation, rather than a null SDValue.
2595 //! Lower i32 multiplication
2596 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2598 switch (VT.getSimpleVT()) {
2600 cerr << "CellSPU: Unknown LowerMUL value type, got "
2601 << Op.getValueType().getMVTString()
2607 SDValue rA = Op.getOperand(0);
2608 SDValue rB = Op.getOperand(1);
2610 return DAG.getNode(ISD::ADD, MVT::i32,
2611 DAG.getNode(ISD::ADD, MVT::i32,
2612 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2613 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2614 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2621 //! Custom lowering for CTPOP (count population)
2623 Custom lowering code that counts the number ones in the input
2624 operand. SPU has such an instruction, but it counts the number of
2625 ones per byte, which then have to be accumulated.
2627 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2628 MVT VT = Op.getValueType();
2629 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2631 switch (VT.getSimpleVT()) {
2633 assert(false && "Invalid value type!");
2635 SDValue N = Op.getOperand(0);
2636 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2638 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2639 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2641 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2645 MachineFunction &MF = DAG.getMachineFunction();
2646 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2648 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2650 SDValue N = Op.getOperand(0);
2651 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2652 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2653 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2655 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2656 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2658 // CNTB_result becomes the chain to which all of the virtual registers
2659 // CNTB_reg, SUM1_reg become associated:
2660 SDValue CNTB_result =
2661 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2663 SDValue CNTB_rescopy =
2664 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2666 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2668 return DAG.getNode(ISD::AND, MVT::i16,
2669 DAG.getNode(ISD::ADD, MVT::i16,
2670 DAG.getNode(ISD::SRL, MVT::i16,
2677 MachineFunction &MF = DAG.getMachineFunction();
2678 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2680 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2681 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2683 SDValue N = Op.getOperand(0);
2684 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2685 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2686 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2687 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2689 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2690 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2692 // CNTB_result becomes the chain to which all of the virtual registers
2693 // CNTB_reg, SUM1_reg become associated:
2694 SDValue CNTB_result =
2695 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2697 SDValue CNTB_rescopy =
2698 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2701 DAG.getNode(ISD::SRL, MVT::i32,
2702 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2705 DAG.getNode(ISD::ADD, MVT::i32,
2706 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2708 SDValue Sum1_rescopy =
2709 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2712 DAG.getNode(ISD::SRL, MVT::i32,
2713 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2716 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2717 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2719 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2729 //! Lower ISD::SELECT_CC
2731 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2734 \note Need to revisit this in the future: if the code path through the true
2735 and false value computations is longer than the latency of a branch (6
2736 cycles), then it would be more advantageous to branch and insert a new basic
2737 block and branch on the condition. However, this code does not make that
2738 assumption, given the simplisitc uses so far.
2741 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2742 MVT VT = Op.getValueType();
2743 SDValue lhs = Op.getOperand(0);
2744 SDValue rhs = Op.getOperand(1);
2745 SDValue trueval = Op.getOperand(2);
2746 SDValue falseval = Op.getOperand(3);
2747 SDValue condition = Op.getOperand(4);
2749 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2750 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2751 // with another "cannot select select_cc" assert:
2753 SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2754 return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2757 //! Custom lower ISD::TRUNCATE
2758 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2760 MVT VT = Op.getValueType();
2761 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2762 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2764 SDValue Op0 = Op.getOperand(0);
2765 MVT Op0VT = Op0.getValueType();
2766 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2768 SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2773 // Create shuffle mask
2774 switch (Op0VT.getSimpleVT()) {
2778 // least significant doubleword of quadword
2779 maskHigh = 0x08090a0b;
2780 maskLow = 0x0c0d0e0f;
2783 // least significant word of quadword
2784 maskHigh = maskLow = 0x0c0d0e0f;
2787 // least significant halfword of quadword
2788 maskHigh = maskLow = 0x0e0f0e0f;
2791 // least significant byte of quadword
2792 maskHigh = maskLow = 0x0f0f0f0f;
2795 cerr << "Truncation to illegal type!";
2802 // least significant word of doubleword
2803 maskHigh = maskLow = 0x04050607;
2806 // least significant halfword of doubleword
2807 maskHigh = maskLow = 0x06070607;
2810 // least significant byte of doubleword
2811 maskHigh = maskLow = 0x07070707;
2814 cerr << "Truncation to illegal type!";
2822 // least significant halfword of word
2823 maskHigh = maskLow = 0x02030203;
2826 // least significant byte of word/halfword
2827 maskHigh = maskLow = 0x03030303;
2830 cerr << "Truncation to illegal type!";
2835 cerr << "Trying to lower truncation from illegal type!";
2839 // Use a shuffle to perform the truncation
2840 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2841 DAG.getConstant(maskHigh, MVT::i32),
2842 DAG.getConstant(maskLow, MVT::i32),
2843 DAG.getConstant(maskHigh, MVT::i32),
2844 DAG.getConstant(maskLow, MVT::i32));
2846 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2847 PromoteScalar, PromoteScalar, shufMask);
2849 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2850 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2853 //! Custom (target-specific) lowering entry point
2855 This is where LLVM's DAG selection process calls to do target-specific
2859 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2861 unsigned Opc = (unsigned) Op.getOpcode();
2862 MVT VT = Op.getValueType();
2866 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2867 cerr << "Op.getOpcode() = " << Opc << "\n";
2868 cerr << "*Op.getNode():\n";
2869 Op.getNode()->dump();
2876 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2878 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2879 case ISD::ConstantPool:
2880 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2881 case ISD::GlobalAddress:
2882 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2883 case ISD::JumpTable:
2884 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2886 return LowerConstant(Op, DAG);
2887 case ISD::ConstantFP:
2888 return LowerConstantFP(Op, DAG);
2890 return LowerBRCOND(Op, DAG);
2891 case ISD::FORMAL_ARGUMENTS:
2892 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2894 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2896 return LowerRET(Op, DAG, getTargetMachine());
2899 // i8, i64 math ops:
2900 case ISD::ZERO_EXTEND:
2901 case ISD::SIGN_EXTEND:
2902 case ISD::ANY_EXTEND:
2911 return LowerI8Math(Op, DAG, Opc);
2912 else if (VT == MVT::i64)
2913 return LowerI64Math(Op, DAG, Opc);
2917 // Vector-related lowering.
2918 case ISD::BUILD_VECTOR:
2919 return LowerBUILD_VECTOR(Op, DAG);
2920 case ISD::SCALAR_TO_VECTOR:
2921 return LowerSCALAR_TO_VECTOR(Op, DAG);
2922 case ISD::VECTOR_SHUFFLE:
2923 return LowerVECTOR_SHUFFLE(Op, DAG);
2924 case ISD::EXTRACT_VECTOR_ELT:
2925 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2926 case ISD::INSERT_VECTOR_ELT:
2927 return LowerINSERT_VECTOR_ELT(Op, DAG);
2929 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2933 return LowerByteImmed(Op, DAG);
2935 // Vector and i8 multiply:
2938 return LowerVectorMUL(Op, DAG);
2939 else if (VT == MVT::i8)
2940 return LowerI8Math(Op, DAG, Opc);
2942 return LowerMUL(Op, DAG, VT, Opc);
2945 if (VT == MVT::f32 || VT == MVT::v4f32)
2946 return LowerFDIVf32(Op, DAG);
2948 // This is probably a libcall
2949 else if (Op.getValueType() == MVT::f64)
2950 return LowerFDIVf64(Op, DAG);
2953 assert(0 && "Calling FDIV on unsupported MVT");
2956 return LowerCTPOP(Op, DAG);
2958 case ISD::SELECT_CC:
2959 return LowerSELECT_CC(Op, DAG);
2962 return LowerTRUNCATE(Op, DAG);
2968 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2969 SmallVectorImpl<SDValue>&Results,
2973 unsigned Opc = (unsigned) N->getOpcode();
2974 MVT OpVT = N->getValueType(0);
2978 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2979 cerr << "Op.getOpcode() = " << Opc << "\n";
2980 cerr << "*Op.getNode():\n";
2988 /* Otherwise, return unchanged */
2991 //===----------------------------------------------------------------------===//
2992 // Target Optimization Hooks
2993 //===----------------------------------------------------------------------===//
2996 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2999 TargetMachine &TM = getTargetMachine();
3001 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
3002 SelectionDAG &DAG = DCI.DAG;
3003 SDValue Op0 = N->getOperand(0); // everything has at least one operand
3004 MVT NodeVT = N->getValueType(0); // The node's value type
3005 MVT Op0VT = Op0.getValueType(); // The first operand's result
3006 SDValue Result; // Initially, empty result
3008 switch (N->getOpcode()) {
3011 SDValue Op1 = N->getOperand(1);
3013 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
3014 SDValue Op01 = Op0.getOperand(1);
3015 if (Op01.getOpcode() == ISD::Constant
3016 || Op01.getOpcode() == ISD::TargetConstant) {
3017 // (add <const>, (SPUindirect <arg>, <const>)) ->
3018 // (SPUindirect <arg>, <const + const>)
3019 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
3020 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
3021 SDValue combinedConst =
3022 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3025 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3027 << "Replace: (add " << CN0->getZExtValue() << ", "
3028 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
3029 << "With: (SPUindirect <arg>, "
3030 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
3034 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
3035 Op0.getOperand(0), combinedConst);
3037 } else if (isa<ConstantSDNode>(Op0)
3038 && Op1.getOpcode() == SPUISD::IndirectAddr) {
3039 SDValue Op11 = Op1.getOperand(1);
3040 if (Op11.getOpcode() == ISD::Constant
3041 || Op11.getOpcode() == ISD::TargetConstant) {
3042 // (add (SPUindirect <arg>, <const>), <const>) ->
3043 // (SPUindirect <arg>, <const + const>)
3044 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
3045 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
3046 SDValue combinedConst =
3047 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3049 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
3050 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
3051 DEBUG(cerr << "With: (SPUindirect <arg>, "
3052 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
3054 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
3055 Op1.getOperand(0), combinedConst);
3060 case ISD::SIGN_EXTEND:
3061 case ISD::ZERO_EXTEND:
3062 case ISD::ANY_EXTEND: {
3063 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
3064 // (any_extend (SPUextract_elt0 <arg>)) ->
3065 // (SPUextract_elt0 <arg>)
3066 // Types must match, however...
3068 // if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3069 cerr << "\nReplace: ";
3072 Op0.getNode()->dump(&DAG);
3081 case SPUISD::IndirectAddr: {
3082 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
3083 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
3084 if (CN->getZExtValue() == 0) {
3085 // (SPUindirect (SPUaform <addr>, 0), 0) ->
3086 // (SPUaform <addr>, 0)
3088 DEBUG(cerr << "Replace: ");
3089 DEBUG(N->dump(&DAG));
3090 DEBUG(cerr << "\nWith: ");
3091 DEBUG(Op0.getNode()->dump(&DAG));
3092 DEBUG(cerr << "\n");
3099 case SPUISD::SHLQUAD_L_BITS:
3100 case SPUISD::SHLQUAD_L_BYTES:
3101 case SPUISD::VEC_SHL:
3102 case SPUISD::VEC_SRL:
3103 case SPUISD::VEC_SRA:
3104 case SPUISD::ROTQUAD_RZ_BYTES:
3105 case SPUISD::ROTQUAD_RZ_BITS: {
3106 SDValue Op1 = N->getOperand(1);
3108 if (isa<ConstantSDNode>(Op1)) {
3109 // Kill degenerate vector shifts:
3110 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3111 if (CN->getZExtValue() == 0) {
3117 case SPUISD::PROMOTE_SCALAR: {
3118 switch (Op0.getOpcode()) {
3121 case ISD::ANY_EXTEND:
3122 case ISD::ZERO_EXTEND:
3123 case ISD::SIGN_EXTEND: {
3124 // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3126 // but only if the SPUpromote_scalar and <arg> types match.
3127 SDValue Op00 = Op0.getOperand(0);
3128 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3129 SDValue Op000 = Op00.getOperand(0);
3130 if (Op000.getValueType() == NodeVT) {
3136 case SPUISD::VEC2PREFSLOT: {
3137 // (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
3139 Result = Op0.getOperand(0);
3146 // Otherwise, return unchanged.
3148 if (Result.getNode()) {
3149 DEBUG(cerr << "\nReplace.SPU: ");
3150 DEBUG(N->dump(&DAG));
3151 DEBUG(cerr << "\nWith: ");
3152 DEBUG(Result.getNode()->dump(&DAG));
3153 DEBUG(cerr << "\n");
3160 //===----------------------------------------------------------------------===//
3161 // Inline Assembly Support
3162 //===----------------------------------------------------------------------===//
3164 /// getConstraintType - Given a constraint letter, return the type of
3165 /// constraint it is for this target.
3166 SPUTargetLowering::ConstraintType
3167 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3168 if (ConstraintLetter.size() == 1) {
3169 switch (ConstraintLetter[0]) {
3176 return C_RegisterClass;
3179 return TargetLowering::getConstraintType(ConstraintLetter);
3182 std::pair<unsigned, const TargetRegisterClass*>
3183 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3186 if (Constraint.size() == 1) {
3187 // GCC RS6000 Constraint Letters
3188 switch (Constraint[0]) {
3192 return std::make_pair(0U, SPU::R64CRegisterClass);
3193 return std::make_pair(0U, SPU::R32CRegisterClass);
3196 return std::make_pair(0U, SPU::R32FPRegisterClass);
3197 else if (VT == MVT::f64)
3198 return std::make_pair(0U, SPU::R64FPRegisterClass);
3201 return std::make_pair(0U, SPU::GPRCRegisterClass);
3205 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3208 //! Compute used/known bits for a SPU operand
3210 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3214 const SelectionDAG &DAG,
3215 unsigned Depth ) const {
3217 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3220 switch (Op.getOpcode()) {
3222 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3232 case SPUISD::PROMOTE_SCALAR: {
3233 SDValue Op0 = Op.getOperand(0);
3234 MVT Op0VT = Op0.getValueType();
3235 unsigned Op0VTBits = Op0VT.getSizeInBits();
3236 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3237 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3238 KnownOne |= APInt(Op0VTBits, InMask, false);
3242 case SPUISD::LDRESULT:
3243 case SPUISD::VEC2PREFSLOT: {
3244 MVT OpVT = Op.getValueType();
3245 unsigned OpVTBits = OpVT.getSizeInBits();
3246 uint64_t InMask = OpVT.getIntegerVTBitMask();
3247 KnownZero |= APInt(OpVTBits, ~InMask, false);
3248 KnownOne |= APInt(OpVTBits, InMask, false);
3257 case SPUISD::SHLQUAD_L_BITS:
3258 case SPUISD::SHLQUAD_L_BYTES:
3259 case SPUISD::VEC_SHL:
3260 case SPUISD::VEC_SRL:
3261 case SPUISD::VEC_SRA:
3262 case SPUISD::VEC_ROTL:
3263 case SPUISD::VEC_ROTR:
3264 case SPUISD::ROTQUAD_RZ_BYTES:
3265 case SPUISD::ROTQUAD_RZ_BITS:
3266 case SPUISD::ROTBYTES_LEFT:
3267 case SPUISD::SELECT_MASK:
3269 case SPUISD::FPInterp:
3270 case SPUISD::FPRecipEst:
3271 case SPUISD::SEXT32TO64:
3276 // LowerAsmOperandForConstraint
3278 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3279 char ConstraintLetter,
3281 std::vector<SDValue> &Ops,
3282 SelectionDAG &DAG) const {
3283 // Default, for the time being, to the base class handler
3284 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3288 /// isLegalAddressImmediate - Return true if the integer value can be used
3289 /// as the offset of the target addressing mode.
3290 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3291 const Type *Ty) const {
3292 // SPU's addresses are 256K:
3293 return (V > -(1 << 18) && V < (1 << 18) - 1);
3296 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3301 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3302 // The SPU target isn't yet aware of offsets.